From 2b6d2daa6ac60a33760503dbbd3b8ad9d7177876 Mon Sep 17 00:00:00 2001
From: Xuejun Zhai <Xuejun.Zhai@intel.com>
Date: Thu, 5 Mar 2026 17:37:21 -0800
Subject: [PATCH] Add interface is_model_splitted() to check the c-graph is
 splited or not

---
 ggml/src/ggml-openvino/ggml-decoder.cpp |  4 +-
 ggml/src/ggml-openvino/ggml-decoder.h   |  2 +
 ggml/src/ggml-openvino/utils.cpp        | 55 ++++++++++++++++++++++---
 ggml/src/ggml-openvino/utils.h          |  2 +
 4 files changed, 57 insertions(+), 6 deletions(-)
diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
index 0938d2273e..b663cd67f6 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -45,6 +45,7 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph,
                              std::map<std::string, std::shared_ptr<ov::Node>> & model_weights,
                              bool is_static,
                              bool is_stateful,
+                             bool model_is_splitted,
                              bool is_prefill,
                              int prefill_chunk_size) :
     m_is_static(is_static),
@@ -52,6 +53,7 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph,
     m_is_prefill(is_prefill),
     m_naive(false),
     m_prefill_chunk_size(prefill_chunk_size),
+    m_model_is_splitted(model_is_splitted),
     m_cgraph(cgraph),
     m_model_weights(model_weights),
     m_model_params(model_params),
@@ -972,4 +974,4 @@ const std::string & GgmlOvDecoder::get_op_type(int node_idx) const {
 const std::string & GgmlOvDecoder::get_op_type() const {
     static const std::string unknown_op = "UNKNOWN_GGML_OP";
     return unknown_op;
-}
+}
\ No newline at end of file
diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h
index 3ae25ddda3..5c44f4b9ac 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@@ -69,6 +69,7 @@ public:
                   std::map<std::string, std::shared_ptr<ov::Node>> & model_weights,
                   bool is_static,
                   bool is_stateful = false,
+                  bool model_is_splitted = false,
                   bool is_prefill = false,
                   int prefill_chunk_size = 256);
 
@@ -205,6 +206,7 @@ public:
     bool m_is_prefill = false;
     bool m_naive = false;
     int m_prefill_chunk_size = 0;
+    bool m_model_is_splitted = false; // label the cgraph is splited or not
 
     static ov::Shape get_shape(const ggml_tensor * tensor);
     static std::vector<size_t> get_stride(const ggml_tensor * tensor);
diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp
index 1b553a0de0..c126005bce 100644
--- a/ggml/src/ggml-openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/utils.cpp
@@ -85,7 +85,9 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, std::shared_ptr<
     bool stateful = r_ctx->stateful;
     static auto is_static = false;
 
-    if (is_naive(cgraph)) {
+    bool model_is_splitted = is_model_splitted(cgraph);
+
+    if (is_naive(cgraph) && !model_is_splitted) {
         return naive_compute(cgraph, core, device, config);
     }
 
@@ -175,8 +177,8 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, std::shared_ptr<
             std::shared_ptr<ov::Model> model;
             auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph);
 
-            ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, m_params, c_params, model_weights, is_static, stateful);
-            decoder_end_time = ggml_time_us();
+        ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, m_params, c_params, model_weights, is_static, stateful, model_is_splitted);
+        decoder_end_time = ggml_time_us();
 
             auto input_model = std::make_shared<ov::frontend::ggml::InputModel>(ggml_decoder);
             model = ov::frontend::ggml::FrontEnd::convert(input_model);
@@ -338,9 +340,9 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph, std::shared_ptr<o
         auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph);
 
         auto ggml_decoder_prefill = std::make_shared<GgmlOvDecoder>(cgraph, m_params, c_params, model_weights,
-                                                                    is_static, stateful, true, prefill_chunk_size);
+                                                                    is_static, stateful, false, true, prefill_chunk_size);
         auto ggml_decoder_decode = std::make_shared<GgmlOvDecoder>(cgraph, m_params, c_params, model_weights, is_static,
-                                                                   stateful, false, prefill_chunk_size);
+                                                                   stateful, false, false, prefill_chunk_size);
         decoder_end_time = ggml_time_us();
 
         auto input_model_prefill = std::make_shared<ov::frontend::ggml::InputModel>(ggml_decoder_prefill);
@@ -470,6 +472,49 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph, std::shared_ptr<o
     return GGML_STATUS_SUCCESS;
 }
 
+bool is_model_splitted(ggml_cgraph * cgraph) {
+    // check the nodes of the model are used by the following nodes, through compare the node's use count and the count of nodes that use it as input. If does not match, return true, else return false.
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        ggml_tensor * node = cgraph->nodes[i];
+        int use_count = cgraph->use_counts[ggml_hash_find(&cgraph->visited_hash_set, node)];
+        // TODO: this is a workround for the tests case from llama.cpp, fix should from the root cause in the future.
+        if ((cgraph->n_nodes <= 1 && use_count==0) || (cgraph->n_nodes <= 1 && node->op == GGML_OP_VIEW && use_count == 1 && node->src[0] != nullptr && node->src[0]->op == GGML_OP_NONE)) {
+            return false;
+        }
+        int input_use_count = 0;
+        for (int j = 0; j < cgraph->n_nodes; j++) {
+            ggml_tensor * other_node = cgraph->nodes[j];
+            for (int k = 0; k < GGML_MAX_SRC; k++) {
+                if (other_node->src[k] == node) {
+                    input_use_count++;
+                }
+            }
+        }
+        if (use_count != input_use_count && node->op != GGML_OP_NONE) {
+            return true;
+        }
+    }
+    // if all nodes's src node's src is not come from the nodes in the model, we think the model is splitted. This is a complementary check for the above check, because for some special case like the output node is not used by any node, the use count and input use count are both 0, we can not determine whether the model is splitted or not just based on the first check.
+    auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph);
+    std::set<ggml_tensor *> model_nodes(cgraph->nodes, cgraph->nodes + cgraph->n_nodes);
+    // leaf nodes
+    std::set<ggml_tensor *> model_leafs(cgraph->leafs, cgraph->leafs + cgraph->n_leafs);
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        ggml_tensor * node = cgraph->nodes[i];
+        for (int j = 0; j < GGML_MAX_SRC; j++) {
+            ggml_tensor * src = node->src[j];
+            // the src is also not the model weights, we think the model is splitted.
+            // the src is also not in model leafs, we think the model is splitted.
+            if (src != nullptr && model_nodes.find(src) == model_nodes.end() &&
+                model_weights.find(std::string(src->name)) == model_weights.end() && !model_leafs.empty() == false &&
+                model_leafs.find(src) == model_leafs.end()) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 bool is_naive(ggml_cgraph * cgraph) {
     constexpr int naive_graph_size_threshold = 20;
     int count = 0;
diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h
index 656573d138..b5ab395002 100644
--- a/ggml/src/ggml-openvino/utils.h
+++ b/ggml/src/ggml-openvino/utils.h
@@ -117,6 +117,8 @@ ov::Tensor create_ov_output_tensor(std::shared_ptr<GgmlOvDecoder> ggml_decoder,
 
 bool is_naive(struct ggml_cgraph * cgraph);
 
+bool is_model_splitted(struct ggml_cgraph * cgraph);
+
 enum ggml_status naive_compute(struct ggml_cgraph * cgraph,
                                ov::Core & core,
                                const std::string & device,