From d12cc3d1ca6bba741cd77887ac9c9ee18c8415c7 Mon Sep 17 00:00:00 2001
From: Aman Gupta <amangupta052@gmail.com>
Date: Thu, 9 Apr 2026 01:01:56 +0800
Subject: [PATCH] CUDA: also store `node->src->data` ptrs for equality check
 (#21635)

* CUDA: also store node->src->data ptrs for equality check

* address review comments
---
 ggml/src/ggml-cuda/common.cuh   |  6 +++++-
 ggml/src/ggml-cuda/ggml-cuda.cu | 21 ++++++++++++++-------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
index a2960e5ae3..65d7a6e22a 100644
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@@ -1173,7 +1173,11 @@ struct ggml_cuda_graph {
     std::vector<cudaGraphNode_t> nodes;
     bool disable_due_to_gpu_arch = false;
     bool warmup_complete = false;
-    std::vector<ggml_tensor> nodes_copy;
+    struct node_properties {
+        ggml_tensor node;
+        void * node_src_data_ptrs[GGML_MAX_SRC];
+    };
+    std::vector<node_properties> node_props;
 
     bool is_enabled() const {
         static const bool disable_cuda_graphs_due_to_env = (getenv("GGML_CUDA_DISABLE_GRAPHS") != nullptr);
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index b21196bb4f..648124c0d3 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2979,18 +2979,25 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
     ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
 
     // Check if the graph size has changed
-    if ((int)graph->nodes_copy.size() != cgraph->n_nodes) {
+    if ((int)graph->node_props.size() != cgraph->n_nodes) {
         res = true;
-        graph->nodes_copy.resize(cgraph->n_nodes);
+        graph->node_props.resize(cgraph->n_nodes);
     }
 
     for (int i = 0; i < cgraph->n_nodes; i++) {
-        if (!res) {
-            if (memcmp(&graph->nodes_copy[i], cgraph->nodes[i], sizeof(ggml_tensor)) != 0) {
-                res = true;
-            }
+        ggml_cuda_graph::node_properties prop = {};
+        memcpy(&prop.node, cgraph->nodes[i], sizeof(ggml_tensor));
+
+        // if the backend scheduler is making copies of CPU tensors, the src pointers can be the same but with different data, see:
+        // https://github.com/ggml-org/llama.cpp/pull/21472#discussion_r3052235188
+        for (int j = 0; j < GGML_MAX_SRC; ++j) {
+            prop.node_src_data_ptrs[j] = cgraph->nodes[i]->src[j] ? cgraph->nodes[i]->src[j]->data : nullptr;
         }
-        memcpy(&graph->nodes_copy[i], cgraph->nodes[i], sizeof(ggml_tensor));
+
+        if (!res && memcmp(&graph->node_props[i], &prop, sizeof(prop)) != 0) {
+            res = true;
+        }
+        graph->node_props[i] = prop;
     }
 
     return res;