From 28da9a9adc0efe5f6f11e31b688d602db3680771 Mon Sep 17 00:00:00 2001 From: "Yu, Zijun" Date: Fri, 5 Dec 2025 14:34:10 +0800 Subject: [PATCH] Reuse cached decoder --- ggml/src/ggml-openvino/utils.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 935404136c..1f94d4bad6 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -248,9 +248,11 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph) { if (cache_hit) { std::map> model_weights; - ggml_decoder = std::make_shared(cgraph, m_params, c_params, model_weights, is_static, - is_prefill, prefill_chunk_size); - decoder_cache[key] = ggml_decoder; + ggml_decoder = decoder_cache[key]; + ggml_decoder->m_is_prefill = is_prefill; + ggml_decoder->set_model_params(m_params); + ggml_decoder->set_compute_params(c_params); + ggml_decoder->add_extra_inputs(); infer_request = is_prefill ? infer_request_cache_prefill[key] : infer_request_cache[key]; decoder_end_time = ggml_time_us();