add the missing guard

This commit is contained in:
bssrdf 2025-11-15 01:24:09 -05:00
parent dbeb6ced46
commit e10b495dd2
1 changed files with 2 additions and 0 deletions

View File

@ -914,7 +914,9 @@ static __global__ void conv2d_implicit_kernel(const half * __restrict__ input,
int s = 0;
int r = 0;
while (block_k < num_block_tiles_k){
#if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
asm volatile("cp.async.wait_group %0;\n" ::"n"(0));
#endif
__syncthreads();
// moves to the next tile