From 4f85c33d332f93be07a92af8459001e2d9569ce5 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Tue, 23 Sep 2025 14:42:52 +0800 Subject: [PATCH] ggml-cpu: add unroll to boost perf Signed-off-by: Aaron Teo --- ggml/src/ggml-cpu/arch/s390/quants.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ggml/src/ggml-cpu/arch/s390/quants.c b/ggml/src/ggml-cpu/arch/s390/quants.c index 2638162bf1..9ad1cdf59a 100644 --- a/ggml/src/ggml-cpu/arch/s390/quants.c +++ b/ggml/src/ggml-cpu/arch/s390/quants.c @@ -284,6 +284,7 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo float32x4_t v_acc = vec_splats(0.0f); + #pragma GCC unroll 8 for (; ib + 1 < nb; ib += 2) { const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0]; const block_mxfp4 * GGML_RESTRICT x1 = &x[ib + 1]; @@ -321,6 +322,7 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo v_acc = vec_madd(v_xy1f, v_d1, v_acc); } + #pragma GCC unroll 8 for (; ib < nb; ++ib) { const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0]; const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];