diff --git a/ggml/src/ggml-cpu/arch/arm/quants.c b/ggml/src/ggml-cpu/arch/arm/quants.c index e630d4c98d..cb502d4b40 100644 --- a/ggml/src/ggml-cpu/arch/arm/quants.c +++ b/ggml/src/ggml-cpu/arch/arm/quants.c @@ -43,7 +43,8 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const int nb = k / QK8_0; block_q8_0 * GGML_RESTRICT y = vy; - + + // printf("Here"); #if defined(__ARM_FEATURE_SVE) const int sve_register_length = ggml_cpu_get_sve_cnt() * 8; const int ggml_f32_epr = sve_register_length / 32; diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index a617f951e2..e389a46dbe 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -340,7 +340,6 @@ void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRI } } -// SVE Support added for Scaler Implementation void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) { static const int qk = QK8_0; @@ -348,29 +347,13 @@ void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRI const int nb = k / qk; - #if defined(__ARM_FEATURE_SVE) - svbool_t pg = svptrue_b32(); - const svfloat32_t inactive1 = svdup_n_f32(0.0f); - const int ggml_f32_epr = svcntw(); - for (int i = 0; i < nb; i+=1) { - const float d1 = GGML_FP16_TO_FP32(x[i].d); // d:0 - const int8_t *x_data1 = x[i].qs; - float *y_base = y + i * qk; - for (int j = 0; j < qk; j+=ggml_f32_epr) { - svint32_t vec0 = svld1sb_s32(pg, x_data1 + j); - svfloat32_t fvec0 = svmul_n_f32_m(pg, svcvt_f32_s32_m(inactive1, pg, vec0), d1); // Convert to float and scale - svst1_f32(pg, y_base + j, fvec0); - } - } - #else - for (int i = 0; i < nb; i++) { - const float d = GGML_FP16_TO_FP32(x[i].d); + for (int i = 0; i < nb; i++) { + const float d = GGML_FP16_TO_FP32(x[i].d); - for (int j = 0; j < qk; ++j) { - y[i*qk + j] = x[i].qs[j]*d; - } + for (int j = 0; j < qk; ++j) { + y[i*qk + j] = x[i].qs[j]*d; } - #endif + } } //