If SVE 256 not present then was using generic function to compute, hence slowing the performance.

So added code if SVE 256 is not present then use NEON code.
This commit is contained in:
Vithule, Prashant 2026-02-06 10:04:41 +00:00 committed by Vithulep
parent 0866740119
commit 9e15d138f2
1 changed files with 2 additions and 1 deletions

View File

@ -3535,8 +3535,9 @@ void ggml_gemm_q4_K_8x8_q8_K(int n,
} // for y
return;
}
#endif // SVE compile-time end
#elif defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
#if defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
constexpr int q8_k_blocklen = 4;
const uint8x16_t m4b = vdupq_n_u8(0x0f);