Fix error for decoder cache

This commit is contained in:
Xuejun Zhai 2025-12-04 22:14:05 -08:00 committed by Mustafa Cavus
parent 47c91db31f
commit 91a1b20c82
2 changed files with 5 additions and 3 deletions

View File

@ -210,10 +210,10 @@ public:
static std::vector<size_t> get_stride(const ggml_tensor * tensor);
static ov::element::Type get_ov_type(const ggml_tensor * tensor);
static std::string compute_op_type(const ggml_tensor * node);
void add_extra_inputs();
private:
void set_input_output(ggml_tensor * node, bool naive = false);
void add_extra_inputs();
int compute_op_case(const ggml_tensor * node) const;
void validate_cgraph() const;

View File

@ -102,8 +102,10 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
if (cache_hit) {
std::map<std::string, std::shared_ptr<ov::Node>> model_weights;
ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, m_params, c_params, model_weights, is_static);
decoder_cache[key] = ggml_decoder;
ggml_decoder = decoder_cache[key];
ggml_decoder->set_compute_params(c_params);
ggml_decoder->set_model_params(m_params);
ggml_decoder->add_extra_inputs();
infer_request = infer_request_cache[key];
decoder_end_time = ggml_time_us();