From 2c7d721fb056fa122f6eeeed43355728f19d4a38 Mon Sep 17 00:00:00 2001 From: Xiongchuan Tan Date: Wed, 7 Jan 2026 14:58:22 +0800 Subject: [PATCH] refactor ggml_vec_dot_q5_K_q8_K --- ggml/src/ggml-cpu/arch/riscv/dispatch.cpp | 5 +++++ ggml/src/ggml-cpu/arch/riscv/kernels.inc | 2 ++ ggml/src/ggml-cpu/arch/riscv/quants.c | 21 +++++---------------- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/ggml/src/ggml-cpu/arch/riscv/dispatch.cpp b/ggml/src/ggml-cpu/arch/riscv/dispatch.cpp index 6e1ebe278b..a7f5be3cc2 100644 --- a/ggml/src/ggml-cpu/arch/riscv/dispatch.cpp +++ b/ggml/src/ggml-cpu/arch/riscv/dispatch.cpp @@ -78,6 +78,10 @@ static int dispatch_vlenb = probe_vlenb(); #elif defined(__riscv_xtheadvector) +void ggml_vec_dot_q5_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { + ggml_vec_dot_q5_K_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc); +} + #define RVV_VEC_DOT_DISPATCH(func_name, ...) \ void func_name(int n, float * GGML_RESTRICT s, size_t bs, \ const void * GGML_RESTRICT vx, size_t bx, \ @@ -101,6 +105,7 @@ extern "C" { RVV_VEC_DOT_DISPATCH(ggml_vec_dot_q2_K_q8_K, 32, _256, 16, _128) RVV_VEC_DOT_DISPATCH(ggml_vec_dot_q3_K_q8_K, 32, _256, 16, _128) RVV_VEC_DOT_DISPATCH(ggml_vec_dot_q4_K_q8_K, 32, _256, 16, _128) +RVV_VEC_DOT_DISPATCH(ggml_vec_dot_q5_K_q8_K, 16, _128) } diff --git a/ggml/src/ggml-cpu/arch/riscv/kernels.inc b/ggml/src/ggml-cpu/arch/riscv/kernels.inc index 5dddb0bcc7..3f2aae7ac9 100644 --- a/ggml/src/ggml-cpu/arch/riscv/kernels.inc +++ b/ggml/src/ggml-cpu/arch/riscv/kernels.inc @@ -7,3 +7,5 @@ void ggml_vec_dot_q3_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const void ggml_vec_dot_q4_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); void ggml_vec_dot_q4_K_q8_K_256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); void ggml_vec_dot_q4_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q5_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); +void ggml_vec_dot_q5_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); diff --git a/ggml/src/ggml-cpu/arch/riscv/quants.c b/ggml/src/ggml-cpu/arch/riscv/quants.c index b7094b4961..a120f2ead2 100644 --- a/ggml/src/ggml-cpu/arch/riscv/quants.c +++ b/ggml/src/ggml-cpu/arch/riscv/quants.c @@ -1590,7 +1590,9 @@ void ggml_vec_dot_q4_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const #endif // ggml_vec_dot_q4_K_q8_K -void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { +#if defined __riscv_v + +void ggml_vec_dot_q5_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); assert(nrc == 1); UNUSED(nrc); @@ -1609,8 +1611,6 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi uint32_t utmp[4]; -#if defined __riscv_v - const uint8_t * scales = (const uint8_t*)&utmp[0]; const uint8_t * mins = (const uint8_t*)&utmp[2]; @@ -1694,21 +1694,10 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi } *s = sumf+sums; - -#else - - UNUSED(x); - UNUSED(y); - UNUSED(kmask1); - UNUSED(kmask2); - UNUSED(kmask3); - UNUSED(nb); - UNUSED(utmp); - - ggml_vec_dot_q5_K_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc); -#endif } +#endif // ggml_vec_dot_q5_K_q8_K + void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); assert(nrc == 1);