Do not mutate cgraph for fused ADDs

1. We should try to minimize in-place changes to the incoming
   ggml_cgraph where possible (those should happen in graph_optimize)
2. Modifying in-place leads to an additional, unnecessary graph capture
   step as we store the properties before modifying the graph in-place
   in the cuda-backend
This commit is contained in:
Oliver Simons 2026-02-12 16:08:12 +01:00
parent 4b385bfcf8
commit 79f5eaf55e
1 changed files with 5 additions and 3 deletions

View File

@ -3640,11 +3640,13 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
n_fuse++;
if (n_fuse > 1) {
ggml_tensor fused_add_node;
memcpy(&fused_add_node, node, sizeof(ggml_tensor));
for (int j = 0; j < n_fuse - 1; ++j) {
node->src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
fused_add_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
}
cgraph->nodes[i + n_fuse - 1]->data = node->data;
ggml_cuda_op_fused_add(*cuda_ctx, node, n_fuse);
fused_add_node.data = cgraph->nodes[i + n_fuse - 1]->data;
ggml_cuda_op_fused_add(*cuda_ctx, &fused_add_node, n_fuse);
i += n_fuse - 1;
continue;