From 65348b5d2029117347721d664ba3f5a00458a9d2 Mon Sep 17 00:00:00 2001 From: "Yu, Zijun" Date: Thu, 27 Nov 2025 15:52:20 +0800 Subject: [PATCH] fallback naive run with accuracy issue --- ggml/src/ggml-openvino/ggml-decoder.cpp | 97 ++++++++++--------- ggml/src/ggml-openvino/ggml-decoder.h | 37 ++++--- ggml/src/ggml-openvino/openvino/decoder.hpp | 6 +- .../ggml-openvino/openvino/node_context.hpp | 8 +- ggml/src/ggml-openvino/openvino/op/cont.cpp | 2 +- .../openvino/op/flash_attn_ext.cpp | 2 +- .../ggml-openvino/openvino/op/glu_geglu.cpp | 2 +- .../ggml-openvino/openvino/op/glu_swiglu.cpp | 2 +- .../src/ggml-openvino/openvino/op/permute.cpp | 29 ++++-- .../src/ggml-openvino/openvino/op/reshape.cpp | 10 +- .../ggml-openvino/openvino/op/rms_norm.cpp | 2 +- ggml/src/ggml-openvino/openvino/op/rope.cpp | 4 +- ggml/src/ggml-openvino/openvino/op/scale.cpp | 2 +- .../ggml-openvino/openvino/op/set_rows.cpp | 2 +- .../src/ggml-openvino/openvino/op/softmax.cpp | 2 +- ggml/src/ggml-openvino/openvino/op/view.cpp | 2 +- .../openvino/translate_session.cpp | 11 ++- ggml/src/ggml-openvino/utils.cpp | 12 +-- 18 files changed, 134 insertions(+), 98 deletions(-) diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 4c0258c4e3..ece1fc8a54 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -89,32 +89,60 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map data_addr_map; + std::unordered_set output_name_set; + for (const auto & node_info : m_node_info_list) { + for (const auto & it : node_info.node_inputs) { + const auto & src_name = it.first; + const auto & src_node = it.second; + + if (output_name_set.find(src_name) == output_name_set.end() && + m_model_weights.find(src_name) == m_model_weights.end() && + m_model_inputs.find(src_name) == m_model_inputs.end()) { + auto param_node = + std::make_shared(get_ov_type(src_node), ov::Shape(get_shape(src_node))); + param_node->set_friendly_name(src_name); + param_node->output(0).get_tensor().set_names({src_name}); + m_model_inputs[src_name] = param_node; + } + } + output_name_set.emplace(node_info.node_output_name); + data_addr_map[node_info.data_addr] = node_info.node_output; + } + for (const auto & it : data_addr_map) { + // No need to add view tensors as model outputs + if (it.second->op != GGML_OP_VIEW) { + m_model_outputs[std::string(it.second->name)] = it.second; + } + } } -// Called in GgmlOvDecoder constructor. Two cases: 1. constructing a decoder for the whole graph; -// 2. constructing a decoder for a node; -// 3. constructing a decoder for the whole graph naively (op test case) void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { - std::string node_name; NodeInfo current_node_info; + auto node_name = std::string(node->name); + auto node_output_name = node_name; + auto * node_output = node; if (node->op == GGML_OP_SET_ROWS) { // SET_ROWS updates the tensor in place. For later ov op that uses the // the view_src of SET_ROWS, we need to make sure they get the updated tensor // by putting the view_src name in the tensor_map in // /src/frontends/ggml/src/translate_session.cpp - node_name = std::string(node->view_src->name); - } else { - node_name = std::string(node->name); + node_output_name = std::string(node->view_src->name); + node_output = node->view_src; } - m_output_names.push_back(node_name); - m_outputs[node_name] = node; + m_output_names.push_back(node_output_name); + m_outputs[node_output_name] = node_output; current_node_info.node = node; current_node_info.node_name = node_name; - current_node_info.node_outputs[node_name] = node; - current_node_info.node_outputs_names.push_back(node_name); + current_node_info.node_output = node_output; + current_node_info.node_output_name = node_output_name; current_node_info.node_op_case = 0; + current_node_info.data_addr = node->data; for (int i = 0; i < GGML_MAX_SRC; i++) { auto * src = node->src[i]; @@ -127,17 +155,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { current_node_info.node_inputs[src_name] = src; current_node_info.node_inputs_names.push_back(src_name); - // Add model inputs and weights constants, if called for the whole graph - if (naive) { - if (m_model_weights.find(src_name) == m_model_weights.end()) { - auto param_node = - std::make_shared(get_ov_type(src), get_graph_input_shape(node, src)); - param_node->set_friendly_name(src_name); - param_node->output(0).get_tensor().set_names({src_name}); - m_model_inputs[src_name] = param_node; - } - - } else if (!src->view_src) { + // Add model inputs + if (!naive && !src->view_src) { ggml_backend_buffer * buffer = src->buffer; if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY || src->flags & GGML_TENSOR_FLAG_INPUT) { @@ -157,18 +176,15 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { } } - // Add model outputs, if called for the whole graph - if (naive) { - m_model_output_names.push_back(node_name); - } else { + // Add model outputs + if (!naive) { // Model outputs are tensors with GGML_TENSOR_FLAG_OUTPUT flag and kv_caches static std::set debug_output_names = {}; // Workaround: the final tensor "result_output" does not have GGML_TENSOR_FLAG_OUTPUT flag set in cgraph if (node->op == GGML_OP_SET_ROWS || node->flags & GGML_TENSOR_FLAG_OUTPUT || - node_name.find("output") != std::string::npos || debug_output_names.count(node_name)) { - if (auto it = std::find(m_model_output_names.begin(), m_model_output_names.end(), node_name); - it == m_model_output_names.end()) { - m_model_output_names.push_back(node_name); + node_output_name.find("output") != std::string::npos || debug_output_names.count(node_output_name)) { + if (m_model_outputs.find(node_output_name) == m_model_outputs.end()) { + m_model_outputs[node_output_name] = node_output; } } } @@ -176,7 +192,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) { m_node_info_list.push_back(current_node_info); } -int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) { +int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const { int op_case = 0; switch (node->op) { case GGML_OP_RESHAPE: { @@ -370,9 +386,6 @@ ov::PartialShape GgmlOvDecoder::get_graph_input_shape(const ggml_tensor * op, co int len = m_is_static ? (m_is_prefill ? m_prefill_chunk_size : 1) : -1; input_shape = ov::PartialShape{1, 1, 1, len}; - } else if (input->op == GGML_OP_VIEW) { - // This case is added to make test-backend-ops work - input_shape = ov::PartialShape{get_shape(input->view_src)}; } else { input_shape = ov::PartialShape{get_shape(input)}; } @@ -762,17 +775,11 @@ std::vector GgmlOvDecoder::get_output_stride(const std::string & name) c ov::PartialShape GgmlOvDecoder::get_output_shape(const std::string & name) const { auto * ggml_tensor = m_outputs.at(name); - if (ggml_tensor->op == GGML_OP_SET_ROWS) { - ggml_tensor = ggml_tensor->view_src; - } return ov::PartialShape(get_shape(ggml_tensor)); } -ov::PartialShape GgmlOvDecoder::get_output_shape(int node_idx, const std::string & name) const { - auto * ggml_tensor = m_node_info_list[node_idx].node_outputs.at(name); - if (ggml_tensor->op == GGML_OP_SET_ROWS) { - ggml_tensor = ggml_tensor->view_src; - } +ov::PartialShape GgmlOvDecoder::get_output_shape(int node_idx) const { + auto * ggml_tensor = m_node_info_list[node_idx].node_output; return ov::PartialShape(get_shape(ggml_tensor)); } @@ -785,7 +792,7 @@ std::vector GgmlOvDecoder::get_output_names() const { } std::vector GgmlOvDecoder::get_output_names(int node_idx) const { - return m_node_info_list[node_idx].node_outputs_names; + return {m_node_info_list[node_idx].node_output_name}; } const std::string & GgmlOvDecoder::get_op_name() const { @@ -809,8 +816,8 @@ int32_t * GgmlOvDecoder::get_output_op_params(const std::string & name) const { return m_outputs.at(name)->op_params; } -int32_t * GgmlOvDecoder::get_output_op_params(int node_idx, const std::string & name) const { - return m_node_info_list[node_idx].node_outputs.at(name)->op_params; +int32_t * GgmlOvDecoder::get_output_op_params(int node_idx) const { + return m_node_info_list[node_idx].node->op_params; } void GgmlOvDecoder::visit_subgraph(std::function, int node_idx)> node_visitor) const { diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index f2efb65a23..8e680b5c20 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -51,13 +51,14 @@ class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { public: struct NodeInfo { ggml_tensor * node; + std::string node_name; + std::string node_op_type; std::map node_inputs; std::vector node_inputs_names; - std::map node_outputs; - std::vector node_outputs_names; + ggml_tensor * node_output; + std::string node_output_name; int node_op_case = 0; - std::string node_op_type; - std::string node_name; + void * data_addr; }; // Graph decoder GgmlOvDecoder(ggml_cgraph * cgraph, @@ -106,7 +107,7 @@ public: virtual ov::PartialShape get_output_shape(const std::string & name) const override; - virtual ov::PartialShape get_output_shape(int node_idx, const std::string & name) const override; + virtual ov::PartialShape get_output_shape(int node_idx) const override; virtual std::vector get_output_stride(const std::string & name) const override; @@ -118,7 +119,7 @@ public: virtual int32_t * get_output_op_params(const std::string & name) const override; - virtual int32_t * get_output_op_params(int node_idx, const std::string & name) const override; + virtual int32_t * get_output_op_params(int node_idx) const override; virtual std::vector get_output_names() const override; @@ -156,7 +157,16 @@ public: return m_model_weights; } - virtual const std::vector & get_model_output_names() const override { return m_model_output_names; } + virtual std::vector get_model_output_names() const override { + std::vector output_names; + output_names.reserve(m_model_outputs.size()); + for (const auto & [name, tensor] : m_model_outputs) { + output_names.push_back(name); + } + return output_names; + } + + const std::map & get_model_outputs() const { return m_model_outputs; } virtual int get_ctx_size() const { return m_model_params.ctx; } @@ -214,14 +224,15 @@ public: bool m_is_prefill = false; int m_prefill_chunk_size = 0; -private: - void set_input_output(ggml_tensor * node, bool naive = false); - void add_extra_inputs(); static std::vector get_shape(const ggml_tensor * tensor); static std::vector get_stride(const ggml_tensor * tensor); static ov::element::Type get_ov_type(const ggml_tensor * tensor); - int compute_op_case(const ggml_tensor * node); - std::string compute_op_type(const ggml_tensor * node); + static std::string compute_op_type(const ggml_tensor * node); + +private: + void set_input_output(ggml_tensor * node, bool naive = false); + void add_extra_inputs(); + int compute_op_case(const ggml_tensor * node) const; void validate_cgraph() const; @@ -236,7 +247,7 @@ private: std::map> m_model_extra_inputs; std::map> m_model_extra_input_values; std::map> m_model_weights; - std::vector m_model_output_names; + std::map m_model_outputs; std::vector m_node_info_list; ModelParams m_model_params; diff --git a/ggml/src/ggml-openvino/openvino/decoder.hpp b/ggml/src/ggml-openvino/openvino/decoder.hpp index 9c455a3724..2ecc4401df 100644 --- a/ggml/src/ggml-openvino/openvino/decoder.hpp +++ b/ggml/src/ggml-openvino/openvino/decoder.hpp @@ -39,7 +39,7 @@ public: virtual PartialShape get_output_shape(const std::string& name) const = 0; - virtual PartialShape get_output_shape(int node_idx, const std::string& name) const = 0; + virtual PartialShape get_output_shape(int node_idx) const = 0; virtual std::vector get_output_stride(const std::string& name) const = 0; @@ -51,7 +51,7 @@ public: virtual int32_t* get_output_op_params(const std::string& name) const = 0; - virtual int32_t* get_output_op_params(int node_idx, const std::string& name) const = 0; + virtual int32_t * get_output_op_params(int node_idx) const = 0; virtual std::vector get_output_names() const = 0; @@ -72,7 +72,7 @@ public: virtual const std::map>& get_model_inputs() const = 0; virtual const std::map>& get_model_extra_inputs() const = 0; virtual const std::map>& get_model_weights() const = 0; - virtual const std::vector& get_model_output_names() const = 0; + virtual std::vector get_model_output_names() const = 0; virtual int32_t* get_rope_params() const = 0; // virtual std::map get_kv_param_res_names() const = 0; diff --git a/ggml/src/ggml-openvino/openvino/node_context.hpp b/ggml/src/ggml-openvino/openvino/node_context.hpp index 64e3d550c5..42d950c3eb 100644 --- a/ggml/src/ggml-openvino/openvino/node_context.hpp +++ b/ggml/src/ggml-openvino/openvino/node_context.hpp @@ -53,17 +53,13 @@ public: std::string get_output_name() const { return m_output_names[0]; } - PartialShape get_output_shape(size_t index) const { - return m_decoder->get_output_shape(m_node_idx, m_output_names[index]); - } + PartialShape get_output_shape() const { return m_decoder->get_output_shape(m_node_idx); } int32_t* get_input_op_params(size_t index) const { return m_decoder->get_input_op_params(m_node_idx, m_input_names[index]); } - int32_t* get_output_op_params(size_t index) const { - return m_decoder->get_output_op_params(m_node_idx, m_output_names[index]); - } + int32_t * get_output_op_params() const { return m_decoder->get_output_op_params(m_node_idx); } ov::element::Type get_output_type(size_t index) const { return m_decoder->get_output_type(m_output_names[index]); diff --git a/ggml/src/ggml-openvino/openvino/op/cont.cpp b/ggml/src/ggml-openvino/openvino/op/cont.cpp index 618b4efdea..d4c47d4bf1 100644 --- a/ggml/src/ggml-openvino/openvino/op/cont.cpp +++ b/ggml/src/ggml-openvino/openvino/op/cont.cpp @@ -22,7 +22,7 @@ OutputVector translate_cont(const NodeContext & context) { FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2 || op_case == 3, "Unsupported CONT case"); auto src_shape = context.get_input_shape(0).to_shape(); - auto dst_shape = context.get_output_shape(0).to_shape(); + auto dst_shape = context.get_output_shape().to_shape(); ov::Output res; if (op_case == 1) { diff --git a/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp b/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp index efbdf421c6..342da882aa 100644 --- a/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +++ b/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp @@ -26,7 +26,7 @@ OutputVector translate_flash_attn_ext(const NodeContext & context) { auto v = context.get_input(2); auto mask = context.get_input(3); - float * params = reinterpret_cast(context.get_output_op_params(0)); + float * params = reinterpret_cast(context.get_output_op_params()); float scale = params[0]; // float max_bias = params[1]; // float logit_softcap = params[2]; diff --git a/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp b/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp index 80bfbafd83..ad5cd3f6ba 100644 --- a/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +++ b/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp @@ -32,7 +32,7 @@ OutputVector translate_glu_geglu(const NodeContext & context) { src1 = split->output(1); } - int32_t * params = context.get_output_op_params(0); + int32_t * params = context.get_output_op_params(); const int32_t swapped = params[1]; if (swapped) { std::swap(src0, src1); diff --git a/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp b/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp index 2148931246..2b7f13629f 100644 --- a/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +++ b/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp @@ -32,7 +32,7 @@ OutputVector translate_glu_swiglu(const NodeContext & context) { src1 = split->output(1); } - int32_t * params = context.get_output_op_params(0); + int32_t * params = context.get_output_op_params(); const int32_t swapped = params[1]; if (swapped) { std::swap(src0, src1); diff --git a/ggml/src/ggml-openvino/openvino/op/permute.cpp b/ggml/src/ggml-openvino/openvino/op/permute.cpp index d156e48e3c..bfe09a2b84 100644 --- a/ggml/src/ggml-openvino/openvino/op/permute.cpp +++ b/ggml/src/ggml-openvino/openvino/op/permute.cpp @@ -32,10 +32,12 @@ OutputVector translate_permute(const NodeContext & context) { if (op_case == 1) { res = std::make_shared(src, perm); } else if (op_case == 4) { - auto output_shape = context.get_output_shape(0).to_shape(); + auto output_shape = context.get_output_shape().to_shape(); auto n_heads = ov::op::v0::Constant::create(ov::element::i64, {1}, {output_shape[1]}); auto head_size = ov::op::v0::Constant::create(ov::element::i64, {1}, {output_shape[3]}); - auto n_seq_active = context.get_input("n_seq_active"); + auto n_seq_active = context.has_input("n_seq_active") ? + context.get_input("n_seq_active") : + ov::op::v0::Constant::create(ov::element::i64, {1}, {output_shape[0]}); auto neg_one = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1}); auto new_shape = @@ -49,26 +51,39 @@ OutputVector translate_permute(const NodeContext & context) { res = std::make_shared(reshaped, perm); } else { auto cache_shape = src.get_partial_shape(); - auto output_shape = context.get_output_shape(0).to_shape(); + auto output_shape = context.get_output_shape().to_shape(); int64_t head_size = output_shape[3]; int64_t n_heads = output_shape[1]; int64_t ctx_per_seq = cache_shape[2].is_static() ? cache_shape[2].get_length() : -1; int64_t n_seq = cache_shape[1].get_length(); Output attention_size; - if (op_case == 2) { + if (!context.has_input("attention_size")) { + attention_size = ov::op::v0::Constant::create(ov::element::i64, {1}, {output_shape[2]}); + } else if (op_case == 2) { attention_size = context.get_input("attention_size"); } else { attention_size = context.get_input("attention_size_swa"); } + Output seq_active_start; + Output seq_active_end; + if (context.has_input("seq_active_start")) { + seq_active_start = context.get_input("seq_active_start"); + seq_active_end = context.get_input("seq_active_end"); + } else { + int64_t n_seq_active = output_shape[0]; + size_t offset = *((size_t *) context.get_input_op_params(0)); + int64_t seq_active_start_val = offset / context.get_input_stride(0)[0]; + int64_t seq_active_end_val = seq_active_start_val + n_seq_active; + seq_active_start = ov::op::v0::Constant::create(ov::element::i64, {1}, {seq_active_start_val}); + seq_active_end = ov::op::v0::Constant::create(ov::element::i64, {1}, {seq_active_end_val}); + } + // 1. reshape to [n_seq, ctx_per_seq, n_heads, head_size] // 2. slice out the active sequences // 3. slice out the attention part in each sequence // 4. permute - auto seq_active_start = context.get_input("seq_active_start"); - auto seq_active_end = context.get_input("seq_active_end"); - auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1}); diff --git a/ggml/src/ggml-openvino/openvino/op/reshape.cpp b/ggml/src/ggml-openvino/openvino/op/reshape.cpp index b34fa626f1..e26a8c778c 100644 --- a/ggml/src/ggml-openvino/openvino/op/reshape.cpp +++ b/ggml/src/ggml-openvino/openvino/op/reshape.cpp @@ -20,7 +20,7 @@ namespace op { OutputVector translate_reshape(const NodeContext & context) { num_inputs_check(context, 1, 1); - if (context.get_input_shape(0) == context.get_output_shape(0)) { + if (context.get_input_shape(0) == context.get_output_shape()) { return {context.get_input(0)}; } @@ -29,7 +29,7 @@ OutputVector translate_reshape(const NodeContext & context) { op_case == 1 || op_case == 2 || op_case == 3 || op_case == 4 || op_case == 5 || op_case == 6, "Unsupported RESHAPE case"); - auto output_shape = context.get_output_shape(0).to_shape(); + auto output_shape = context.get_output_shape().to_shape(); std::shared_ptr new_shape_node; if (op_case == 1) { new_shape_node = ov::op::v0::Constant::create( @@ -50,18 +50,18 @@ OutputVector translate_reshape(const NodeContext & context) { return {context.get_input(0).get_node_shared_ptr()->input_value(0)}; } else if (op_case == 5) { - std::vector shape_vec = {1, 1, -1, (int64_t) context.get_output_shape(0).to_shape()[3]}; + std::vector shape_vec = {1, 1, -1, (int64_t) context.get_output_shape().to_shape()[3]}; new_shape_node = ov::op::v0::Constant::create(ov::element::i64, {4}, shape_vec); // // Alternative // auto token_len = context.get_input("token_len"); // auto emb_size = - // ov::op::v0::Constant::create(ov::element::i64, {1}, {(int64_t) context.get_output_shape(0).to_shape()[3]}); + // ov::op::v0::Constant::create(ov::element::i64, {1}, {(int64_t) context.get_output_shape().to_shape()[3]}); // auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1}); // new_shape_node = std::make_shared(ov::OutputVector{one, one, token_len, emb_size}, 0); } else if (op_case == 6) { - new_shape_node = ov::op::v0::Constant::create(ov::element::i64, {4}, context.get_output_shape(0).to_shape()); + new_shape_node = ov::op::v0::Constant::create(ov::element::i64, {4}, context.get_output_shape().to_shape()); } auto res = std::make_shared(context.get_input(0), new_shape_node, false); return rename_outputs_with_suffix({res}, context.get_name()); diff --git a/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp b/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp index 3ac96d0c22..99c97e06ae 100644 --- a/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +++ b/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp @@ -27,7 +27,7 @@ OutputVector translate_rms_norm(const NodeContext & context) { square, ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {-1}), true); float eps; - memcpy(&eps, context.get_output_op_params(0), sizeof(float)); + memcpy(&eps, context.get_output_op_params(), sizeof(float)); auto rms = std::make_shared( std::make_shared(mean, ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1}, {eps}))); diff --git a/ggml/src/ggml-openvino/openvino/op/rope.cpp b/ggml/src/ggml-openvino/openvino/op/rope.cpp index 5c83867d18..96fbb6b795 100644 --- a/ggml/src/ggml-openvino/openvino/op/rope.cpp +++ b/ggml/src/ggml-openvino/openvino/op/rope.cpp @@ -31,8 +31,8 @@ OutputVector translate_rope(const NodeContext & context) { ov::Output res; auto data_node = context.get_input(0).get_node_shared_ptr(); - auto output_shape = context.get_output_shape(0).to_shape(); - int32_t * op_params = context.get_output_op_params(0); + auto output_shape = context.get_output_shape().to_shape(); + int32_t * op_params = context.get_output_op_params(); Output cos_theta_node; Output sin_theta_node; diff --git a/ggml/src/ggml-openvino/openvino/op/scale.cpp b/ggml/src/ggml-openvino/openvino/op/scale.cpp index f52381786a..01e59cedd9 100644 --- a/ggml/src/ggml-openvino/openvino/op/scale.cpp +++ b/ggml/src/ggml-openvino/openvino/op/scale.cpp @@ -15,7 +15,7 @@ OutputVector translate_scale(const NodeContext & context) { num_inputs_check(context, 1, 1); float scale; - memcpy(&scale, context.get_output_op_params(0), sizeof(float)); + memcpy(&scale, context.get_output_op_params(), sizeof(float)); auto scale_node = std::make_shared(ov::element::f32, ov::Shape{}, std::vector{scale}); auto res = std::make_shared(context.get_input(0), scale_node); diff --git a/ggml/src/ggml-openvino/openvino/op/set_rows.cpp b/ggml/src/ggml-openvino/openvino/op/set_rows.cpp index a323e5ed38..eb128f04a3 100644 --- a/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +++ b/ggml/src/ggml-openvino/openvino/op/set_rows.cpp @@ -34,7 +34,7 @@ OutputVector translate_set_rows(const NodeContext & context) { data = std::make_shared(data, context.get_output_type(0)); - auto dst_shape = context.get_output_shape(0).to_shape(); + auto dst_shape = context.get_output_shape().to_shape(); auto ind_squeezed = std::make_shared(indices, ov::op::v0::Constant::create(ov::element::i64, {3}, {0, 1, 2})); diff --git a/ggml/src/ggml-openvino/openvino/op/softmax.cpp b/ggml/src/ggml-openvino/openvino/op/softmax.cpp index 591bcb46c4..921475e51a 100644 --- a/ggml/src/ggml-openvino/openvino/op/softmax.cpp +++ b/ggml/src/ggml-openvino/openvino/op/softmax.cpp @@ -31,7 +31,7 @@ OutputVector translate_soft_max(const NodeContext & context) { float scale = 1.0f; float max_bias = 0.0f; - auto * op_params = context.get_output_op_params(0); + auto * op_params = context.get_output_op_params(); memcpy(&scale, (float *) op_params + 0, sizeof(float)); memcpy(&max_bias, (float *) op_params + 1, sizeof(float)); auto src0_shape = context.get_input_shape(0).get_shape(); diff --git a/ggml/src/ggml-openvino/openvino/op/view.cpp b/ggml/src/ggml-openvino/openvino/op/view.cpp index 6bf980cab6..f0b8938bef 100644 --- a/ggml/src/ggml-openvino/openvino/op/view.cpp +++ b/ggml/src/ggml-openvino/openvino/op/view.cpp @@ -10,7 +10,7 @@ OutputVector translate_view(const NodeContext & context) { num_inputs_check(context, 1, 1); if (context.get_op_case() == 2) { - auto dst_shape = context.get_output_shape(0).to_shape(); + auto dst_shape = context.get_output_shape().to_shape(); return rename_outputs_with_suffix({process_view_input(context, 0, dst_shape[2] * dst_shape[3])}, context.get_name()); } diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp index d03c9358b0..546778a470 100644 --- a/ggml/src/ggml-openvino/openvino/translate_session.cpp +++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp @@ -203,7 +203,16 @@ std::shared_ptr TranslateSession::translate_graph(const frontend::InputMo results.push_back(result); } - resulting_model = std::make_shared(results, params); + ov::ParameterVector used_params; + for (const auto & param : params) { + if (!param->output(0).get_target_inputs().empty()) { + used_params.push_back(param); + } + } + // if (auto diff = params.size() - used_params.size()) { + // GGML_LOG_INFO("%zu parameters are not used in the model.", diff); + // } + resulting_model = std::make_shared(results, used_params); apply_transformations(resulting_model); return resulting_model; diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index e90073a1f2..92e8ce80b3 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -362,7 +362,7 @@ std::map get_types_to_requant(const std::string & dev } bool is_naive(ggml_cgraph * cgraph) { - constexpr int naive_graph_size_threshold = 20; + constexpr int naive_graph_size_threshold = 100; return cgraph->n_nodes < naive_graph_size_threshold; } @@ -412,7 +412,7 @@ ov::Tensor convert_ggml_input_to_ov(std::shared_ptr ggml_decoder, ov::Shape input_shape; if (ggml_tensor->op == GGML_OP_VIEW) { // This case is added to make test-backend-ops work - input_shape = ggml_decoder->get_graph_input_shape(ggml_tensor, ggml_tensor->view_src).to_shape(); + input_shape = ggml_decoder->get_shape(ggml_tensor->view_src); } else { input_shape = ggml_decoder->get_input_shape(name).to_shape(); } @@ -545,15 +545,13 @@ ov::Tensor get_ov_input_tensor_static_prefill(std::shared_ptr ggm } ov::Tensor get_ov_output_tensor(std::shared_ptr ggml_decoder, const std::string & result_name) { - auto * ggml_tensor = ggml_decoder->get_output_ggml_tensor(result_name); - auto output_type = ggml_decoder->get_output_type(result_name); - ov::Shape output_shape; - output_shape = ggml_decoder->get_output_shape(result_name).to_shape(); + auto * ggml_tensor = ggml_decoder->get_model_outputs().at(result_name); + auto output_type = ggml_decoder->get_ov_type(ggml_tensor); + auto output_shape = ggml_decoder->get_shape(ggml_tensor); if (ggml_decoder->is_static() && result_name == "result_output") { output_shape[1] = 1; } - ov::Tensor output_tensor(output_type, output_shape, ggml_tensor->data); return output_tensor; }