Fix decoder can_reuse for llama-bench

This commit is contained in:
Yu, Zijun 2025-12-11 11:30:25 +08:00 committed by Mustafa Cavus
parent 9e3163e846
commit 0ef2e5e4d4
2 changed files with 7 additions and 7 deletions

View File

@ -25,15 +25,15 @@ struct ModelParams {
// std::vector<std::string> kv_names; // std::vector<std::string> kv_names;
bool can_reuse_dynamically(const ModelParams & other) const { bool operator==(const ModelParams & other) const {
return n_seq == other.n_seq && n_heads == other.n_heads && n_heads_kv == other.n_heads_kv && return n_seq == other.n_seq && n_heads == other.n_heads && n_heads_kv == other.n_heads_kv &&
head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers; head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers &&
ctx_per_seq == other.ctx_per_seq && ctx_per_seq_swa == other.ctx_per_seq_swa;
} }
bool can_reuse_statically(const ModelParams & other) const { bool can_reuse_dynamically(const ModelParams & other) const { return *this == other; }
return can_reuse_dynamically(other) && ctx_per_seq == other.ctx_per_seq &&
ctx_per_seq_swa == other.ctx_per_seq_swa; bool can_reuse_statically(const ModelParams & other) const { return *this == other; }
}
}; };
struct ComputeParams { struct ComputeParams {

View File

@ -97,7 +97,7 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
cache_hit = it != decoder_cache.end(); cache_hit = it != decoder_cache.end();
if (cache_hit) { if (cache_hit) {
ggml_decoder = it->second; ggml_decoder = it->second;
cache_hit = ggml_decoder->get_model_params().can_reuse_statically(m_params); cache_hit = ggml_decoder->get_model_params().can_reuse_dynamically(m_params);
} }
if (cache_hit) { if (cache_hit) {