Fix decoder can_reuse for llama-bench

This commit is contained in:
Yu, Zijun 2025-12-11 11:30:25 +08:00 committed by Mustafa Cavus
parent 9e3163e846
commit 0ef2e5e4d4
2 changed files with 7 additions and 7 deletions

View File

@ -25,15 +25,15 @@ struct ModelParams {
// std::vector<std::string> kv_names;
bool can_reuse_dynamically(const ModelParams & other) const {
bool operator==(const ModelParams & other) const {
return n_seq == other.n_seq && n_heads == other.n_heads && n_heads_kv == other.n_heads_kv &&
head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers;
head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers &&
ctx_per_seq == other.ctx_per_seq && ctx_per_seq_swa == other.ctx_per_seq_swa;
}
bool can_reuse_statically(const ModelParams & other) const {
return can_reuse_dynamically(other) && ctx_per_seq == other.ctx_per_seq &&
ctx_per_seq_swa == other.ctx_per_seq_swa;
}
bool can_reuse_dynamically(const ModelParams & other) const { return *this == other; }
bool can_reuse_statically(const ModelParams & other) const { return *this == other; }
};
struct ComputeParams {

View File

@ -97,7 +97,7 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
cache_hit = it != decoder_cache.end();
if (cache_hit) {
ggml_decoder = it->second;
cache_hit = ggml_decoder->get_model_params().can_reuse_statically(m_params);
cache_hit = ggml_decoder->get_model_params().can_reuse_dynamically(m_params);
}
if (cache_hit) {