CUDA: also store `node->src->data` ptrs for equality check (#21635)

* CUDA: also store node->src->data ptrs for equality check

* address review comments
This commit is contained in:
Aman Gupta 2026-04-09 01:01:56 +08:00 committed by GitHub
parent 2dcb7f74ed
commit d12cc3d1ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 8 deletions

View File

@ -1173,7 +1173,11 @@ struct ggml_cuda_graph {
std::vector<cudaGraphNode_t> nodes;
bool disable_due_to_gpu_arch = false;
bool warmup_complete = false;
std::vector<ggml_tensor> nodes_copy;
struct node_properties {
ggml_tensor node;
void * node_src_data_ptrs[GGML_MAX_SRC];
};
std::vector<node_properties> node_props;
bool is_enabled() const {
static const bool disable_cuda_graphs_due_to_env = (getenv("GGML_CUDA_DISABLE_GRAPHS") != nullptr);

View File

@ -2979,18 +2979,25 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
// Check if the graph size has changed
if ((int)graph->nodes_copy.size() != cgraph->n_nodes) {
if ((int)graph->node_props.size() != cgraph->n_nodes) {
res = true;
graph->nodes_copy.resize(cgraph->n_nodes);
graph->node_props.resize(cgraph->n_nodes);
}
for (int i = 0; i < cgraph->n_nodes; i++) {
if (!res) {
if (memcmp(&graph->nodes_copy[i], cgraph->nodes[i], sizeof(ggml_tensor)) != 0) {
res = true;
}
ggml_cuda_graph::node_properties prop = {};
memcpy(&prop.node, cgraph->nodes[i], sizeof(ggml_tensor));
// if the backend scheduler is making copies of CPU tensors, the src pointers can be the same but with different data, see:
// https://github.com/ggml-org/llama.cpp/pull/21472#discussion_r3052235188
for (int j = 0; j < GGML_MAX_SRC; ++j) {
prop.node_src_data_ptrs[j] = cgraph->nodes[i]->src[j] ? cgraph->nodes[i]->src[j]->data : nullptr;
}
memcpy(&graph->nodes_copy[i], cgraph->nodes[i], sizeof(ggml_tensor));
if (!res && memcmp(&graph->node_props[i], &prop, sizeof(prop)) != 0) {
res = true;
}
graph->node_props[i] = prop;
}
return res;