Fix faulty consecutive updates handling.

This commit is contained in:
Xiangyan Sun 2025-10-23 09:56:45 -07:00
parent 5c89e8dd71
commit b7f366fad1
2 changed files with 7 additions and 2 deletions

View File

@ -941,6 +941,7 @@ struct ggml_cuda_graph {
std::vector<cudaGraphNode_t> nodes;
std::vector<cudaKernelNodeParams> params;
int number_consecutive_updates = 0;
int number_consecutive_computes = 0;
std::vector<ggml_graph_node_properties> ggml_graph_properties;
#endif
};

View File

@ -3149,8 +3149,7 @@ static void ggml_backend_cuda_graph_plan_update(ggml_backend_t backend, ggml_bac
} else {
cuda_graph_update_required = true;
}
} else {
cuda_graph->number_consecutive_updates = 0;
cuda_graph->number_consecutive_computes = 0;
}
if (use_cuda_graph && cuda_graph_update_required) {
@ -3175,6 +3174,11 @@ static enum ggml_status ggml_backend_cuda_graph_plan_compute(ggml_backend_t back
ggml_cuda_set_device(cuda_ctx->device);
cuda_graph->number_consecutive_computes++;
if (cuda_graph->number_consecutive_computes > 1) {
cuda_graph->number_consecutive_updates = 0;
}
if (cuda_graph->instance) {
CUDA_CHECK(cudaGraphLaunch(cuda_graph->instance, cuda_ctx->stream()));
} else {