If SVE 256 not present then was using generic function to compute, hence slowing the performance.

So added code if SVE 256 is not present then use NEON code.
2026-02-06 10:04:41 +00:00 · 2026-02-06 10:04:41 +00:00 · 9e15d138f2
parent 0866740119
commit 9e15d138f2
1 changed files with 2 additions and 1 deletions
--- a/ggml/src/ggml-cpu/arch/arm/repack.cpp
+++ b/ggml/src/ggml-cpu/arch/arm/repack.cpp
@ -3535,8 +3535,9 @@ void ggml_gemm_q4_K_8x8_q8_K(int                        n,
        }  // for y
        return;
    }
+#endif  // SVE compile-time end

-#elif defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
    constexpr int    q8_k_blocklen = 4;
    const uint8x16_t m4b           = vdupq_n_u8(0x0f);