Fix decoder can_reuse for llama-bench
This commit is contained in:
parent
9e3163e846
commit
0ef2e5e4d4
|
|
@ -25,15 +25,15 @@ struct ModelParams {
|
||||||
|
|
||||||
// std::vector<std::string> kv_names;
|
// std::vector<std::string> kv_names;
|
||||||
|
|
||||||
bool can_reuse_dynamically(const ModelParams & other) const {
|
bool operator==(const ModelParams & other) const {
|
||||||
return n_seq == other.n_seq && n_heads == other.n_heads && n_heads_kv == other.n_heads_kv &&
|
return n_seq == other.n_seq && n_heads == other.n_heads && n_heads_kv == other.n_heads_kv &&
|
||||||
head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers;
|
head_size == other.head_size && rope_params == other.rope_params && swa_layers == other.swa_layers &&
|
||||||
|
ctx_per_seq == other.ctx_per_seq && ctx_per_seq_swa == other.ctx_per_seq_swa;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool can_reuse_statically(const ModelParams & other) const {
|
bool can_reuse_dynamically(const ModelParams & other) const { return *this == other; }
|
||||||
return can_reuse_dynamically(other) && ctx_per_seq == other.ctx_per_seq &&
|
|
||||||
ctx_per_seq_swa == other.ctx_per_seq_swa;
|
bool can_reuse_statically(const ModelParams & other) const { return *this == other; }
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ComputeParams {
|
struct ComputeParams {
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,7 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
|
||||||
cache_hit = it != decoder_cache.end();
|
cache_hit = it != decoder_cache.end();
|
||||||
if (cache_hit) {
|
if (cache_hit) {
|
||||||
ggml_decoder = it->second;
|
ggml_decoder = it->second;
|
||||||
cache_hit = ggml_decoder->get_model_params().can_reuse_statically(m_params);
|
cache_hit = ggml_decoder->get_model_params().can_reuse_dynamically(m_params);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cache_hit) {
|
if (cache_hit) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue