From d12cc3d1ca6bba741cd77887ac9c9ee18c8415c7 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Thu, 9 Apr 2026 01:01:56 +0800 Subject: [PATCH] CUDA: also store `node->src->data` ptrs for equality check (#21635) * CUDA: also store node->src->data ptrs for equality check * address review comments --- ggml/src/ggml-cuda/common.cuh | 6 +++++- ggml/src/ggml-cuda/ggml-cuda.cu | 21 ++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index a2960e5ae3..65d7a6e22a 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -1173,7 +1173,11 @@ struct ggml_cuda_graph { std::vector nodes; bool disable_due_to_gpu_arch = false; bool warmup_complete = false; - std::vector nodes_copy; + struct node_properties { + ggml_tensor node; + void * node_src_data_ptrs[GGML_MAX_SRC]; + }; + std::vector node_props; bool is_enabled() const { static const bool disable_cuda_graphs_due_to_env = (getenv("GGML_CUDA_DISABLE_GRAPHS") != nullptr); diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index b21196bb4f..648124c0d3 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2979,18 +2979,25 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key); // Check if the graph size has changed - if ((int)graph->nodes_copy.size() != cgraph->n_nodes) { + if ((int)graph->node_props.size() != cgraph->n_nodes) { res = true; - graph->nodes_copy.resize(cgraph->n_nodes); + graph->node_props.resize(cgraph->n_nodes); } for (int i = 0; i < cgraph->n_nodes; i++) { - if (!res) { - if (memcmp(&graph->nodes_copy[i], cgraph->nodes[i], sizeof(ggml_tensor)) != 0) { - res = true; - } + ggml_cuda_graph::node_properties prop = {}; + memcpy(&prop.node, cgraph->nodes[i], sizeof(ggml_tensor)); + + // if the backend scheduler is making copies of CPU tensors, the src pointers can be the same but with different data, see: + // https://github.com/ggml-org/llama.cpp/pull/21472#discussion_r3052235188 + for (int j = 0; j < GGML_MAX_SRC; ++j) { + prop.node_src_data_ptrs[j] = cgraph->nodes[i]->src[j] ? cgraph->nodes[i]->src[j]->data : nullptr; } - memcpy(&graph->nodes_copy[i], cgraph->nodes[i], sizeof(ggml_tensor)); + + if (!res && memcmp(&graph->node_props[i], &prop, sizeof(prop)) != 0) { + res = true; + } + graph->node_props[i] = prop; } return res;