Fix decoder can_reuse for llama-bench
This commit is contained in:
parent
9e3163e846
commit
0ef2e5e4d4
|
|
@ -25,15 +25,15 @@ struct ModelParams {
|
|||
|
||||
// std::vector<std::string> kv_names;
|
||||
|
||||
bool can_reuse_dynamically(const ModelParams & other) const {
|
||||
bool operator==(const ModelParams & other) const {
|
||||
return n_seq == other.n_seq && n_heads == other.n_heads && n_heads_kv == other.n_heads_kv &&
|
||||
head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers;
|
||||
head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers &&
|
||||
ctx_per_seq == other.ctx_per_seq && ctx_per_seq_swa == other.ctx_per_seq_swa;
|
||||
}
|
||||
|
||||
bool can_reuse_statically(const ModelParams & other) const {
|
||||
return can_reuse_dynamically(other) && ctx_per_seq == other.ctx_per_seq &&
|
||||
ctx_per_seq_swa == other.ctx_per_seq_swa;
|
||||
}
|
||||
bool can_reuse_dynamically(const ModelParams & other) const { return *this == other; }
|
||||
|
||||
bool can_reuse_statically(const ModelParams & other) const { return *this == other; }
|
||||
};
|
||||
|
||||
struct ComputeParams {
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
|
|||
cache_hit = it != decoder_cache.end();
|
||||
if (cache_hit) {
|
||||
ggml_decoder = it->second;
|
||||
cache_hit = ggml_decoder->get_model_params().can_reuse_statically(m_params);
|
||||
cache_hit = ggml_decoder->get_model_params().can_reuse_dynamically(m_params);
|
||||
}
|
||||
|
||||
if (cache_hit) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue