From 79f5eaf55eb8e20be0a64cc9a5135711849bc70f Mon Sep 17 00:00:00 2001 From: Oliver Simons Date: Thu, 12 Feb 2026 16:08:12 +0100 Subject: [PATCH] Do not mutate cgraph for fused ADDs 1. We should try to minimize in-place changes to the incoming ggml_cgraph where possible (those should happen in graph_optimize) 2. Modifying in-place leads to an additional, unnecessary graph capture step as we store the properties before modifying the graph in-place in the cuda-backend --- ggml/src/ggml-cuda/ggml-cuda.cu | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index b163468789..7dc688483a 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3640,11 +3640,13 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud n_fuse++; if (n_fuse > 1) { + ggml_tensor fused_add_node; + memcpy(&fused_add_node, node, sizeof(ggml_tensor)); for (int j = 0; j < n_fuse - 1; ++j) { - node->src[j + 2] = cgraph->nodes[i + j + 1]->src[1]; + fused_add_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1]; } - cgraph->nodes[i + n_fuse - 1]->data = node->data; - ggml_cuda_op_fused_add(*cuda_ctx, node, n_fuse); + fused_add_node.data = cgraph->nodes[i + n_fuse - 1]->data; + ggml_cuda_op_fused_add(*cuda_ctx, &fused_add_node, n_fuse); i += n_fuse - 1; continue;