diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 1861f19688..de5a0bbd0f 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2307,7 +2307,9 @@ static void ggml_cuda_mul_mat_id(ggml_backend_cuda_context & ctx, ggml_tensor * } // note: this path should not be reached when recording CUDA graphs, because it requires stream synchronization - + cudaStreamCaptureStatus capture_status; + CUDA_CHECK(cudaStreamIsCapturing(stream, &capture_status)); + GGML_ASSERT(capture_status == cudaStreamCaptureStatusNone); cudaStream_t stream = ctx.stream(); GGML_ASSERT(nb12 % nb11 == 0);