diff --git a/ggml/src/ggml-cpu/arch/arm/quants.c b/ggml/src/ggml-cpu/arch/arm/quants.c index b390ab61c7..6265249a56 100644 --- a/ggml/src/ggml-cpu/arch/arm/quants.c +++ b/ggml/src/ggml-cpu/arch/arm/quants.c @@ -45,7 +45,43 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i block_q8_0 * GGML_RESTRICT y = vy; -#if defined(__ARM_NEON) +#if defined(__ARM_FEATURE_SVE) + const int sve_register_length = svcntb() * 8; + const int ggml_f32_epr = sve_register_length / 32; + const svfloat32_t inactive1 = svdup_n_f32(0.0f); + const svbool_t pg = svptrue_b32(); + svfloat32_t zero = svdup_f32(0.0f); + svfloat32_t half = svdup_f32(0.5f); + + for (int i = 0; i < nb; i+=1) { + svfloat32_t srcv1, asrcv1; + svfloat32_t sv_max = svdup_n_f32(0.0f); + float32_t amax = 0.0; + + for (int j = 0; j < QK8_0; j+=ggml_f32_epr) { + srcv1 = svld1_f32(pg, x + i*32 + j); + asrcv1 = svabs_f32_m(inactive1, pg, srcv1); + sv_max = svmax_f32_m(pg, sv_max, asrcv1); + } + amax = svmaxv_f32(pg, sv_max); + float32_t d = amax / ((1 << 7) - 1); + float32_t id = d ? 1.0f/d : 0.0f; + y[i].d = GGML_FP32_TO_FP16(d); + for (int j = 0; j < QK8_0; j+=ggml_f32_epr) { + srcv1 = svld1_f32(pg, x + i*32 + j); + const svfloat32_t v1 = svmul_n_f32_m(pg, srcv1, id); + + svbool_t ge_zero = svcmpge_f32(pg, v1, zero); + svfloat32_t v_pos = svadd_f32_m(pg, v1, half); + svfloat32_t v_neg = svsub_f32_m(pg, v1, half); + + svfloat32_t v_rounded = svsel_f32(ge_zero, v_pos, v_neg); + svint32_t result = svcvt_s32_f32_x(pg, v_rounded); + svst1b_s32(pg, &y[i].qs[j], result); + } + } + +#elif defined(__ARM_NEON) for (int i = 0; i < nb; i++) { float32x4_t srcv [8]; float32x4_t asrcv[8]; diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index de5cbd75e8..5b70714d17 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -405,13 +405,31 @@ void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRI const int nb = k / qk; - for (int i = 0; i < nb; i++) { - const float d = GGML_FP16_TO_FP32(x[i].d); + #if defined(__ARM_FEATURE_SVE) + svbool_t pg = svptrue_b32(); + const svfloat32_t inactive1 = svdup_n_f32(0.0f); + const int ggml_f32_epr = svcntw(); - for (int j = 0; j < qk; ++j) { - y[i*qk + j] = x[i].qs[j]*d; + for (int i = 0; i < nb; i+=1) { + const float d1 = GGML_FP16_TO_FP32(x[i].d); // d:0 + + const int8_t *x_data1 = x[i].qs; + float *y_base = y + i * qk; + for (int j = 0; j < qk; j+=ggml_f32_epr) { + svint32_t vec0 = svld1sb_s32(pg, x_data1 + j); + svfloat32_t fvec0 = svmul_n_f32_m(pg, svcvt_f32_s32_m(inactive1, pg, vec0), d1); // Convert to float and scale + svst1_f32(pg, y_base + j, fvec0); + } } - } + #else + for (int i = 0; i < nb; i++) { + const float d = GGML_FP16_TO_FP32(x[i].d); + + for (int j = 0; j < qk; ++j) { + y[i*qk + j] = x[i].qs[j]*d; + } + } + #endif } void dequantize_row_mxfp4(const block_mxfp4 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {