diff --git a/ggml/src/ggml-openvino/decoder.h b/ggml/src/ggml-openvino/decoder.h index c0641e2662..b0775d43aa 100644 --- a/ggml/src/ggml-openvino/decoder.h +++ b/ggml/src/ggml-openvino/decoder.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include "openvino/core/node.hpp" #include "openvino/frontend/decoder.hpp" @@ -57,8 +57,8 @@ public: virtual bool check_if_continuous() const = 0; - virtual const std::unordered_map>& get_model_inputs() const = 0; - virtual const std::unordered_map>& get_model_weights() const = 0; + virtual const std::map>& get_model_inputs() const = 0; + virtual const std::map>& get_model_weights() const = 0; virtual const std::vector& get_model_output_names() const = 0; }; diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index d42aaf4664..44b46f2c63 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -8,12 +8,14 @@ #include #include #include +#include #include #include #include #include +#include +#include #include -#include #include "ggml-backend-impl.h" #include "ggml-backend.h" @@ -22,16 +24,24 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap : m_cgraph(cgraph), m_node(node), m_op_name(m_node ? std::string(m_node->name) : "NONE_OP") { - static std::unordered_map> model_weights; + static std::map> model_weights; + if (m_node) { set_input_output(m_node, model_weights); } else { + static bool printed = false; + if (!printed && getenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS")) { + print_tensor_address_map(m_cgraph); + printed = true; + } + for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) { auto* cur_node = m_cgraph->nodes[node_n]; m_nodes.push_back(cur_node); set_input_output(cur_node, model_weights); } m_model_weights = model_weights; + if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) { dump_cgraph(m_cgraph); } @@ -41,7 +51,7 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap // Called in GgmlOvDecoder constructor. Two cases: 1. constructing a decoder for the whole graph; // 2. constructing a decoder for a node. void GgmlOvDecoder::set_input_output(ggml_tensor* node, - std::unordered_map>& model_weights) { + std::map>& model_weights) { std::string node_name; if (node->op == GGML_OP_CPY) { // CPY updates the input tensor in place. For later ov op that uses the @@ -100,9 +110,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, } if (!m_node) { + static std::set debug_output_names = {}; // Workaround: the final tensor "result_output" does not have GGML_TENSOR_FLAG_OUTPUT flag set in cgraph if (node->buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY || node->flags & GGML_TENSOR_FLAG_OUTPUT || - std::string(node->name).find("result") == 0) { + std::string(node->name).find("result") == 0 || debug_output_names.count(node->name)) { auto name = node->view_src ? std::string(node->view_src->name) : std::string(node->name); if (node->buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY) { assert(name.find("cache_k") == 0 || name.find("cache_v") == 0); @@ -249,7 +260,7 @@ void GgmlOvDecoder::dump_cgraph(const struct ggml_cgraph* cgraph) { void print_tensor_address_map(const struct ggml_cgraph* cgraph) { std::map> address_map; - for (int node_n = 0; node_n <= cgraph->n_nodes; node_n++) { + for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) { auto* node = cgraph->nodes[node_n]; if (node->data) { auto it = address_map.find(node->data); diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index a71c5e4e1f..c4f7612d76 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -1,7 +1,7 @@ #pragma once +#include #include -#include #include #include "decoder.h" @@ -76,10 +76,10 @@ public: return m_continuous; } - virtual const std::unordered_map>& get_model_inputs() const override { + virtual const std::map>& get_model_inputs() const override { return m_model_inputs; } - virtual const std::unordered_map>& get_model_weights() const override { + virtual const std::map>& get_model_weights() const override { return m_model_weights; } virtual const std::vector& get_model_output_names() const override { @@ -87,7 +87,7 @@ public: } private: - void set_input_output(ggml_tensor* node, std::unordered_map>& model_weights); + void set_input_output(ggml_tensor* node, std::map>& model_weights); static void dump_cgraph(const struct ggml_cgraph* cgraph); static std::vector get_shape(const ggml_tensor* tensor); static std::vector get_stride(const ggml_tensor* tensor); @@ -105,7 +105,9 @@ private: mutable std::string m_name; bool m_continuous; std::vector> m_op_node_name; - std::unordered_map> m_model_inputs; - std::unordered_map> m_model_weights; + std::map> m_model_inputs; + std::map> m_model_weights; std::vector m_model_output_names; }; + +void print_tensor_address_map(const struct ggml_cgraph* cgraph); diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 5feb67d681..32fa7cf481 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -135,10 +135,12 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c << ", Address: " << output_tensors[output_names[i]] << std::endl; switch (output_tensor.get_element_type()) { case ov::element::f32: - std::cout << *(float*)(output_tensors[output_names[i]]) << std::endl; + std::cout << *(float*)(output_tensor.data()) << std::endl; + std::cout << checksum(output_tensor.data(), output_tensor.get_byte_size()) << std::endl; break; case ov::element::f16: - std::cout << ov::float16::from_bits(*(uint16_t*)(output_tensors[output_names[i]])) << std::endl; + std::cout << ov::float16::from_bits(*(uint16_t*)(output_tensor.data())) << std::endl; + std::cout << checksum(output_tensor.data(), output_tensor.get_byte_size()) << std::endl; break; default: break; @@ -161,3 +163,12 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c return GGML_STATUS_SUCCESS; GGML_UNUSED(backend); } + +size_t checksum(const void* data, size_t size) { + const uint8_t* bytes = static_cast(data); + size_t sum = 0; + for (size_t i = 0; i < size; ++i) { + sum += bytes[i]; + } + return sum; +} diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h index b4174c9f21..4458e71f54 100644 --- a/ggml/src/ggml-openvino/utils.h +++ b/ggml/src/ggml-openvino/utils.h @@ -2,3 +2,5 @@ #include "ggml-backend-impl.h" enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph* cgraph); + +size_t checksum(const void* data, size_t size);