From d46f3637a30fac0ff9e555d0dd16a66ff194f4c1 Mon Sep 17 00:00:00 2001 From: vithulep Date: Wed, 25 Jun 2025 13:20:13 +0530 Subject: [PATCH] Updated Quantize_row_q8_0() function --- ggml/src/ggml-cpu/arch/arm/quants.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-cpu/arch/arm/quants.c b/ggml/src/ggml-cpu/arch/arm/quants.c index e630d4c98d..f100ba73a2 100644 --- a/ggml/src/ggml-cpu/arch/arm/quants.c +++ b/ggml/src/ggml-cpu/arch/arm/quants.c @@ -48,8 +48,10 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const int sve_register_length = ggml_cpu_get_sve_cnt() * 8; const int ggml_f32_epr = sve_register_length / 32; const svfloat32_t inactive1 = svdup_n_f32(0.0f); - const svint32_t inactive2 = svdup_n_s32(0); const svbool_t pg = svptrue_b32(); + svfloat32_t zero = svdup_f32(0.0f); + svfloat32_t half = svdup_f32(0.5f); + for (int i = 0; i < nb; i+=1) { svfloat32_t srcv1, asrcv1; svfloat32_t sv_max = svdup_n_f32(0.0f); @@ -67,8 +69,14 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i for (int j = 0; j < QK8_0; j+=ggml_f32_epr) { srcv1 = svld1_f32(pg, x + i*32 + j); const svfloat32_t v1 = svmul_n_f32_m(pg, srcv1, id); - const svint32_t vi1 = svcvt_s32_f32_m(inactive2, pg, v1); - svst1b_s32(pg, &y[i].qs[j], vi1); + + svbool_t ge_zero = svcmpge_f32(pg, v1, zero); + svfloat32_t v_pos = svadd_f32_m(pg, v1, half); + svfloat32_t v_neg = svsub_f32_m(pg, v1, half); + + svfloat32_t v_rounded = svsel_f32(ge_zero, v_pos, v_neg); + svint32_t result = svcvt_s32_f32_x(pg, v_rounded); + svst1b_s32(pg, &y[i].qs[j], result); } } #elif defined(__ARM_NEON)