diff --git a/ggml/src/ggml-cuda/mma.cuh b/ggml/src/ggml-cuda/mma.cuh index d3df5d8c4f..e25a19b656 100644 --- a/ggml/src/ggml-cuda/mma.cuh +++ b/ggml/src/ggml-cuda/mma.cuh @@ -897,7 +897,7 @@ static __device__ __forceinline__ void mma( GGML_UNUSED(A); GGML_UNUSED(B); NO_DEVICE_CODE; -#endif // AMD_MFMA_AVAILABLE +#endif } } diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index a49451c391..3fa27ca3bf 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -1178,7 +1178,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( } } } -#elif defined(AMD_WMMA_AVAILABLE) +#elif defined(AMD_WMMA_AVAILABLE) //wmma instructions can handle 16x4 tiles, does not require loading 64x2 tiles typedef tile<16, 4, int> tile_A; typedef tile<16, 4, int> tile_B; typedef tile<16, 16, int> tile_C; @@ -1500,7 +1500,7 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( } } } -#elif defined(AMD_WMMA_AVAILABLE) +#elif defined(AMD_WMMA_AVAILABLE) //wmma instructions can handle 16x4 tiles, does not require loading 64x2 tiles typedef tile<16, 4, int> tile_A; typedef tile<16, 4, int> tile_B; @@ -2313,7 +2313,7 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( } } } -#elif defined(AMD_WMMA_AVAILABLE) +#elif defined(AMD_WMMA_AVAILABLE) //wmma instructions can handle 16x4 tiles, does not require loading 64x2 tiles typedef tile<16, 4, int> tile_A; typedef tile<16, 4, int> tile_B; typedef tile<16, 16, int> tile_C;