diff --git a/ggml/src/ggml-cuda/mma.cuh b/ggml/src/ggml-cuda/mma.cuh
index e25a19b656..37558f7478 100644
--- a/ggml/src/ggml-cuda/mma.cuh
+++ b/ggml/src/ggml-cuda/mma.cuh
@@ -785,11 +785,11 @@ namespace ggml_cuda_mma {
 
         acc[0] = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12(
             true,
-            a_vec[0],        
-            true,           
-            b_vec[0],                  
-            acc[0],          
-            true           
+            a_vec[0],
+            true,
+            b_vec[0],
+            acc[0],
+            true
         );
 
         acc[0] = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12(
@@ -897,7 +897,7 @@ static __device__ __forceinline__ void mma(
         GGML_UNUSED(A);
         GGML_UNUSED(B);
         NO_DEVICE_CODE;
-#endif 
+#endif
     }
 }
 
diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh
index 7affff4ef8..99760d56c7 100644
--- a/ggml/src/ggml-cuda/mmq.cuh
+++ b/ggml/src/ggml-cuda/mmq.cuh
@@ -1888,7 +1888,7 @@ template <int mmq_y, bool need_check> static __device__ __forceinline__ void loa
 #else
         int i = (i0 + threadIdx.y*rows_per_warp + threadIdx.x/2) % mmq_y;
         {
-#endif // defined(AMD_MFMA_AVAILABLE) || defined(AMD_WMMA_AVAILABLE) 
+#endif // defined(AMD_MFMA_AVAILABLE) || defined(AMD_WMMA_AVAILABLE)
             if (need_check) {
                 i = min(i, i_max);
             }