refactor ggml_vec_dot_q5_K_q8_K

This commit is contained in:
Xiongchuan Tan 2026-01-07 14:58:22 +08:00
parent 3333056088
commit 2c7d721fb0
3 changed files with 12 additions and 16 deletions

View File

@ -78,6 +78,10 @@ static int dispatch_vlenb = probe_vlenb();
#elif defined(__riscv_xtheadvector)
void ggml_vec_dot_q5_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
ggml_vec_dot_q5_K_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
}
#define RVV_VEC_DOT_DISPATCH(func_name, ...) \
void func_name(int n, float * GGML_RESTRICT s, size_t bs, \
const void * GGML_RESTRICT vx, size_t bx, \
@ -101,6 +105,7 @@ extern "C" {
RVV_VEC_DOT_DISPATCH(ggml_vec_dot_q2_K_q8_K, 32, _256, 16, _128)
RVV_VEC_DOT_DISPATCH(ggml_vec_dot_q3_K_q8_K, 32, _256, 16, _128)
RVV_VEC_DOT_DISPATCH(ggml_vec_dot_q4_K_q8_K, 32, _256, 16, _128)
RVV_VEC_DOT_DISPATCH(ggml_vec_dot_q5_K_q8_K, 16, _128)
}

View File

@ -7,3 +7,5 @@ void ggml_vec_dot_q3_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const
void ggml_vec_dot_q4_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q4_K_q8_K_256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q4_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q5_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
void ggml_vec_dot_q5_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);

View File

@ -1590,7 +1590,9 @@ void ggml_vec_dot_q4_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const
#endif // ggml_vec_dot_q4_K_q8_K
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
#if defined __riscv_v
void ggml_vec_dot_q5_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
assert(n % QK_K == 0);
assert(nrc == 1);
UNUSED(nrc);
@ -1609,8 +1611,6 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
uint32_t utmp[4];
#if defined __riscv_v
const uint8_t * scales = (const uint8_t*)&utmp[0];
const uint8_t * mins = (const uint8_t*)&utmp[2];
@ -1694,21 +1694,10 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
}
*s = sumf+sums;
#else
UNUSED(x);
UNUSED(y);
UNUSED(kmask1);
UNUSED(kmask2);
UNUSED(kmask3);
UNUSED(nb);
UNUSED(utmp);
ggml_vec_dot_q5_K_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
#endif
}
#endif // ggml_vec_dot_q5_K_q8_K
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
assert(n % QK_K == 0);
assert(nrc == 1);