Implementation of SVE for kernel quantize_row_q8_0() and dequantize_row_q8_0()

This commit is contained in:
vithulep 2025-06-10 15:17:58 +05:30
parent 1f63e75f3b
commit 397f61590d
3 changed files with 60 additions and 6 deletions

View File

@ -103,6 +103,15 @@ endif()
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
# Enable SVE for ARMv8-A+ architectures
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
message(STATUS "Enabling ARM SVE support")
add_compile_options(-march=native -fopenmp -O3 -ftree-vectorize)
add_compile_definitions(
__ARM_FEATURE_SVE=1
__ARM_FEATURE_FP16_VECTOR_ARITHMETIC=1
)
endif()
if (MSVC)
string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")

View File

@ -44,7 +44,34 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
block_q8_0 * GGML_RESTRICT y = vy;
#if defined(__ARM_NEON)
#if defined(__ARM_FEATURE_SVE)
const int sve_register_length = ggml_cpu_get_sve_cnt() * 8;
const int ggml_f32_epr = sve_register_length / 32;
const svfloat32_t inactive1 = svdup_n_f32(0.0f);
const svint32_t inactive2 = svdup_n_s32(0);
const svbool_t pg = svptrue_b32();
for (int i = 0; i < nb; i+=1) {
svfloat32_t srcv1, asrcv1;
svfloat32_t sv_max = svdup_n_f32(0.0f);
float32_t amax = 0.0;
for (int j = 0; j < QK8_0; j+=ggml_f32_epr) {
srcv1 = svld1_f32(pg, x + i*32 + j);
asrcv1 = svabs_f32_m(inactive1, pg, srcv1);
sv_max = svmax_f32_m(pg, sv_max, asrcv1);
}
amax = svmaxv_f32(pg, sv_max);
float32_t d = amax / ((1 << 7) - 1);
float32_t id = d ? 1.0f/d : 0.0f;
y[i].d = GGML_FP32_TO_FP16(d);
for (int j = 0; j < QK8_0; j+=ggml_f32_epr) {
srcv1 = svld1_f32(pg, x + i*32 + j);
const svfloat32_t v1 = svmul_n_f32_m(pg, srcv1, id);
const svint32_t vi1 = svcvt_s32_f32_m(inactive2, pg, v1);
svst1b_s32(pg, &y[i].qs[j], vi1);
}
}
#elif defined(__ARM_NEON)
for (int i = 0; i < nb; i++) {
float32x4_t srcv [8];
float32x4_t asrcv[8];

View File

@ -347,13 +347,31 @@ void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRI
const int nb = k / qk;
for (int i = 0; i < nb; i++) {
const float d = GGML_FP16_TO_FP32(x[i].d);
#if defined(__ARM_FEATURE_SVE)
svbool_t pg = svptrue_b32();
const svfloat32_t inactive1 = svdup_n_f32(0.0f);
const int ggml_f32_epr = svcntw();
for (int j = 0; j < qk; ++j) {
y[i*qk + j] = x[i].qs[j]*d;
for (int i = 0; i < nb; i+=1) {
const float d1 = GGML_FP16_TO_FP32(x[i].d); // d:0
const int8_t *x_data1 = x[i].qs;
float *y_base = y + i * qk;
for (int j = 0; j < qk; j+=ggml_f32_epr) {
svint32_t vec0 = svld1sb_s32(pg, x_data1 + j);
svfloat32_t fvec0 = svmul_n_f32_m(pg, svcvt_f32_s32_m(inactive1, pg, vec0), d1); // Convert to float and scale
svst1_f32(pg, y_base + j, fvec0);
}
}
}
#else
for (int i = 0; i < nb; i++) {
const float d = GGML_FP16_TO_FP32(x[i].d);
for (int j = 0; j < qk; ++j) {
y[i*qk + j] = x[i].qs[j]*d;
}
}
#endif
}
//