If SVE 256 not present then was using generic function to compute, hence slowing the performance.
So added code if SVE 256 is not present then use NEON code.
This commit is contained in:
parent
0866740119
commit
9e15d138f2
|
|
@ -3535,8 +3535,9 @@ void ggml_gemm_q4_K_8x8_q8_K(int n,
|
|||
} // for y
|
||||
return;
|
||||
}
|
||||
#endif // SVE compile-time end
|
||||
|
||||
#elif defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
#if defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
constexpr int q8_k_blocklen = 4;
|
||||
const uint8x16_t m4b = vdupq_n_u8(0x0f);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue