Fix decoder can_reuse for llama-bench

2025-12-11 11:30:25 +08:00 · 2025-12-11 11:30:25 +08:00 · 0ef2e5e4d4
parent 9e3163e846
commit 0ef2e5e4d4
2 changed files with 7 additions and 7 deletions
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@ -25,15 +25,15 @@ struct ModelParams {

    // std::vector<std::string> kv_names;

-    bool can_reuse_dynamically(const ModelParams & other) const {
+    bool operator==(const ModelParams & other) const {
        return n_seq == other.n_seq && n_heads == other.n_heads && n_heads_kv == other.n_heads_kv &&
-               head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers;
+               head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers &&
+               ctx_per_seq == other.ctx_per_seq && ctx_per_seq_swa == other.ctx_per_seq_swa;
    }

-    bool can_reuse_statically(const ModelParams & other) const {
-        return can_reuse_dynamically(other) && ctx_per_seq == other.ctx_per_seq &&
-               ctx_per_seq_swa == other.ctx_per_seq_swa;
-    }
+    bool can_reuse_dynamically(const ModelParams & other) const { return *this == other; }
+
+    bool can_reuse_statically(const ModelParams & other) const { return *this == other; }
 };

 struct ComputeParams {
--- a/ggml/src/ggml-openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/utils.cpp
@ -97,7 +97,7 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
        cache_hit = it != decoder_cache.end();
        if (cache_hit) {
            ggml_decoder = it->second;
-            cache_hit = ggml_decoder->get_model_params().can_reuse_statically(m_params);
+            cache_hit = ggml_decoder->get_model_params().can_reuse_dynamically(m_params);
        }

        if (cache_hit) {