From 28da9a9adc0efe5f6f11e31b688d602db3680771 Mon Sep 17 00:00:00 2001
From: "Yu, Zijun" <zijun.yu@intel.com>
Date: Fri, 5 Dec 2025 14:34:10 +0800
Subject: [PATCH] Reuse cached decoder

---
 ggml/src/ggml-openvino/utils.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp
index 935404136c..1f94d4bad6 100644
--- a/ggml/src/ggml-openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/utils.cpp
@@ -248,9 +248,11 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph) {
 
         if (cache_hit) {
             std::map<std::string, std::shared_ptr<ov::Node>> model_weights;
-            ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, m_params, c_params, model_weights, is_static,
-                                                           is_prefill, prefill_chunk_size);
-            decoder_cache[key] = ggml_decoder;
+            ggml_decoder = decoder_cache[key];
+            ggml_decoder->m_is_prefill = is_prefill;
+            ggml_decoder->set_model_params(m_params);
+            ggml_decoder->set_compute_params(c_params);
+            ggml_decoder->add_extra_inputs();
             infer_request = is_prefill ? infer_request_cache_prefill[key] : infer_request_cache[key];
 
             decoder_end_time = ggml_time_us();