diff --git a/ggml/src/ggml-cpu/simd-gemm.h b/ggml/src/ggml-cpu/simd-gemm.h
index 29defceb4e..70dd92eb5c 100644
--- a/ggml/src/ggml-cpu/simd-gemm.h
+++ b/ggml/src/ggml-cpu/simd-gemm.h
@@ -11,11 +11,11 @@
 // TODO: add support for sizeless vector types
 #if defined(GGML_SIMD) && !defined(__ARM_FEATURE_SVE) && !defined(__riscv_v_intrinsic)
 
-// TODO: untested on avx512 and arm
+// TODO: untested on avx512
 // These are in units of GGML_F32_EPR
 #if defined(__AVX512F__) || defined (__ARM_NEON__)
-    static constexpr int GEMM_RM = 6;
-    static constexpr int GEMM_RN = 4; // 24+4+1 = 29/32
+    static constexpr int GEMM_RM = 4;
+    static constexpr int GEMM_RN = 4; // 16+4+1 = 25/32
 #elif defined(__AVX2__) || defined(__AVX__)
     static constexpr int GEMM_RM = 6;
     static constexpr int GEMM_RN = 2; // 12+2+1 = 15/16
@@ -66,7 +66,7 @@ static void simd_gemm(
     float       * GGML_RESTRICT C,
     const float * GGML_RESTRICT A,
     const float * GGML_RESTRICT B,
-    int M, int64_t K, int64_t N)
+    int64_t M, int64_t K, int64_t N)
 {
     static constexpr int KN = GGML_F32_EPR;
 
@@ -115,7 +115,7 @@ static void simd_gemm(
     float       * GGML_RESTRICT C,
     const float * GGML_RESTRICT A,
     const float * GGML_RESTRICT B,
-    int M, int64_t K, int64_t N)
+    int64_t M, int64_t K, int64_t N)
 {
     for (int i = 0; i < M; i++) {
         for (int64_t j = 0; j < N; j++) {