ggml-cpu: add unroll to boost perf

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
Aaron Teo 2025-09-23 14:42:52 +08:00
parent 5fb1bb99fe
commit 4f85c33d33
No known key found for this signature in database
1 changed files with 2 additions and 0 deletions

View File

@ -284,6 +284,7 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
float32x4_t v_acc = vec_splats(0.0f); float32x4_t v_acc = vec_splats(0.0f);
#pragma GCC unroll 8
for (; ib + 1 < nb; ib += 2) { for (; ib + 1 < nb; ib += 2) {
const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0]; const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0];
const block_mxfp4 * GGML_RESTRICT x1 = &x[ib + 1]; const block_mxfp4 * GGML_RESTRICT x1 = &x[ib + 1];
@ -321,6 +322,7 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
v_acc = vec_madd(v_xy1f, v_d1, v_acc); v_acc = vec_madd(v_xy1f, v_d1, v_acc);
} }
#pragma GCC unroll 8
for (; ib < nb; ++ib) { for (; ib < nb; ++ib) {
const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0]; const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0];
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0]; const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];