From a29e4c0b7b23e020107058480dabbe03b7cba6e1 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Sat, 11 Apr 2026 10:30:30 +0800 Subject: [PATCH] CUDA: also store node->src ne/nb for graph equality (#21736) --- ggml/src/ggml-cuda/common.cuh | 4 +++- ggml/src/ggml-cuda/ggml-cuda.cu | 12 +++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 56a67f1edc..8a4246223b 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -1185,7 +1185,9 @@ struct ggml_cuda_graph { bool warmup_complete = false; struct node_properties { ggml_tensor node; - void * node_src_data_ptrs[GGML_MAX_SRC]; + void * node_src_data_ptrs[GGML_MAX_SRC]; + int64_t node_src_ne[GGML_MAX_SRC][GGML_MAX_DIMS]; + size_t node_src_nb[GGML_MAX_SRC][GGML_MAX_DIMS]; }; std::vector node_props; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 8613d20b9f..3113de017f 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3070,16 +3070,18 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx ggml_cuda_graph::node_properties prop = {}; memcpy(&prop.node, cgraph->nodes[i], sizeof(ggml_tensor)); - // if the backend scheduler is making copies of CPU tensors, the src pointers can be the same but with different data, see: - // https://github.com/ggml-org/llama.cpp/pull/21472#discussion_r3052235188 for (int j = 0; j < GGML_MAX_SRC; ++j) { - prop.node_src_data_ptrs[j] = cgraph->nodes[i]->src[j] ? cgraph->nodes[i]->src[j]->data : nullptr; + if (cgraph->nodes[i]->src[j]) { + prop.node_src_data_ptrs[j] = cgraph->nodes[i]->src[j]->data; + memcpy(prop.node_src_ne[j], cgraph->nodes[i]->src[j]->ne, sizeof(prop.node_src_ne[j])); + memcpy(prop.node_src_nb[j], cgraph->nodes[i]->src[j]->nb, sizeof(prop.node_src_nb[j])); + } } - if (!res && memcmp(&graph->node_props[i], &prop, sizeof(prop)) != 0) { + if (res || memcmp(&graph->node_props[i], &prop, sizeof(prop)) != 0) { + graph->node_props[i] = prop; res = true; } - graph->node_props[i] = prop; } return res;