From 8ac5c225aa26a4a58a9d296842524be2f3e756a5 Mon Sep 17 00:00:00 2001 From: "Yu, Zijun" Date: Fri, 16 May 2025 10:12:22 +0800 Subject: [PATCH] FIX: set_max_token_len --- ggml/src/ggml-openvino/ggml-decoder.cpp | 5 +++-- ggml/src/ggml-openvino/utils.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index a8e1ad5556..e6474d6def 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -44,13 +44,14 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap dump_cgraph(m_cgraph); } + set_max_token_len(); + static bool weight_created = false; if (!getenv("GGML_OPENVINO_WEIGHT_AS_INPUT") && !weight_created) { add_weight_const_parallel(model_weights); weight_created = true; } - set_max_token_len(); for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) { auto* cur_node = m_cgraph->nodes[node_n]; m_nodes.push_back(cur_node); @@ -197,7 +198,7 @@ void GgmlOvDecoder::set_max_token_len() { auto* node = m_cgraph->nodes[i]; if (std::string(node->name) == "k-0") { auto* cache_k = node->src[0]; - m_max_token_len = cache_k->ne[0] / node->ne[0] / node->ne[1]; + m_max_token_len = cache_k->ne[0] / node->ne[0] / node->ne[2]; break; } } diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 09bf0d0ac5..040ca1961e 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -209,4 +209,4 @@ void print_output_tensor_info(const std::string& name, default: break; } -} \ No newline at end of file +}