ggml-cpu: add vlen switch for rvv vec_dot
This commit is contained in:
parent
2031856102
commit
15604a89df
|
|
@ -2175,7 +2175,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
static void ggml_vec_dot_iq3_s_q8_K_vl256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
assert(n % QK_K == 0);
|
assert(n % QK_K == 0);
|
||||||
UNUSED(nrc);
|
UNUSED(nrc);
|
||||||
UNUSED(bx);
|
UNUSED(bx);
|
||||||
|
|
@ -2187,7 +2187,6 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
|
|
||||||
const int nb = n / QK_K;
|
const int nb = n / QK_K;
|
||||||
|
|
||||||
#if defined __riscv_v_intrinsic
|
|
||||||
const uint64_t * grid64 = (const uint64_t *)iq3s_grid;
|
const uint64_t * grid64 = (const uint64_t *)iq3s_grid;
|
||||||
|
|
||||||
// --- Pre-load Constants ---
|
// --- Pre-load Constants ---
|
||||||
|
|
@ -2269,16 +2268,24 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
sumf += sum_block * combined_scale;
|
sumf += sum_block * combined_scale;
|
||||||
}
|
}
|
||||||
*s = 0.125f * sumf;
|
*s = 0.125f * sumf;
|
||||||
#else
|
}
|
||||||
UNUSED(x);
|
|
||||||
UNUSED(y);
|
|
||||||
UNUSED(nb);
|
|
||||||
|
|
||||||
|
void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
|
#if defined __riscv_v_intrinsic
|
||||||
|
switch (__riscv_vlenb() * 8) {
|
||||||
|
case 256:
|
||||||
|
ggml_vec_dot_iq3_s_q8_K_vl256(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ggml_vec_dot_iq3_s_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
ggml_vec_dot_iq3_s_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
ggml_vec_dot_iq3_s_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
static void ggml_vec_dot_tq1_0_q8_K_vl256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
assert(nrc == 1);
|
assert(nrc == 1);
|
||||||
UNUSED(nrc);
|
UNUSED(nrc);
|
||||||
UNUSED(bx);
|
UNUSED(bx);
|
||||||
|
|
@ -2290,7 +2297,6 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
|
|
||||||
const int nb = n / QK_K;
|
const int nb = n / QK_K;
|
||||||
|
|
||||||
#if defined __riscv_v_intrinsic
|
|
||||||
float sumf = 0.0f;
|
float sumf = 0.0f;
|
||||||
uint8_t pow[16] = {1, 1, 1, 1, 3, 3, 3, 3, 9, 9, 9, 9, 27, 27, 27, 27};
|
uint8_t pow[16] = {1, 1, 1, 1, 3, 3, 3, 3, 9, 9, 9, 9, 27, 27, 27, 27};
|
||||||
|
|
||||||
|
|
@ -2383,17 +2389,24 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = sumf;
|
*s = sumf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
|
#if defined __riscv_v_intrinsic
|
||||||
|
switch (__riscv_vlenb() * 8) {
|
||||||
|
case 256:
|
||||||
|
ggml_vec_dot_tq1_0_q8_K_vl256(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ggml_vec_dot_tq1_0_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
UNUSED(x);
|
|
||||||
UNUSED(y);
|
|
||||||
UNUSED(nb);
|
|
||||||
|
|
||||||
ggml_vec_dot_tq1_0_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
ggml_vec_dot_tq1_0_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
static void ggml_vec_dot_tq2_0_q8_K_vl256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
assert(n % QK_K == 0);
|
assert(n % QK_K == 0);
|
||||||
assert(nrc == 1);
|
assert(nrc == 1);
|
||||||
UNUSED(nrc);
|
UNUSED(nrc);
|
||||||
|
|
@ -2406,7 +2419,6 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
|
|
||||||
const int nb = n / QK_K;
|
const int nb = n / QK_K;
|
||||||
|
|
||||||
#if defined __riscv_v_intrinsic
|
|
||||||
float sumf = 0.0f;
|
float sumf = 0.0f;
|
||||||
for (int i = 0; i < nb; ++i) {
|
for (int i = 0; i < nb; ++i) {
|
||||||
int32_t sumi = 0;
|
int32_t sumi = 0;
|
||||||
|
|
@ -2463,17 +2475,24 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = sumf;
|
*s = sumf;
|
||||||
#else
|
|
||||||
UNUSED(x);
|
|
||||||
UNUSED(y);
|
|
||||||
UNUSED(nb);
|
|
||||||
|
|
||||||
ggml_vec_dot_tq2_0_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
|
||||||
#endif // __riscv_vector
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
|
#if defined __riscv_v_intrinsic
|
||||||
|
switch (__riscv_vlenb() * 8) {
|
||||||
|
case 256:
|
||||||
|
ggml_vec_dot_tq2_0_q8_K_vl256(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ggml_vec_dot_tq2_0_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
ggml_vec_dot_tq2_0_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_vec_dot_iq1_s_q8_K_vl256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
assert(n % QK_K == 0);
|
assert(n % QK_K == 0);
|
||||||
assert(nrc == 1);
|
assert(nrc == 1);
|
||||||
UNUSED(nrc);
|
UNUSED(nrc);
|
||||||
|
|
@ -2486,7 +2505,6 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
|
|
||||||
const int nb = n / QK_K;
|
const int nb = n / QK_K;
|
||||||
|
|
||||||
#if defined __riscv_v_intrinsic
|
|
||||||
float sumf = 0;
|
float sumf = 0;
|
||||||
for (int i = 0; i < nb; ++i) {
|
for (int i = 0; i < nb; ++i) {
|
||||||
// Load qh once for the entire superblock.
|
// Load qh once for the entire superblock.
|
||||||
|
|
@ -2568,16 +2586,24 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = sumf;
|
*s = sumf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
|
#if defined __riscv_v_intrinsic
|
||||||
|
switch (__riscv_vlenb() * 8) {
|
||||||
|
case 256:
|
||||||
|
ggml_vec_dot_iq1_s_q8_K_vl256(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ggml_vec_dot_iq1_s_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
UNUSED(x);
|
|
||||||
UNUSED(y);
|
|
||||||
UNUSED(nb);
|
|
||||||
ggml_vec_dot_iq1_s_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
ggml_vec_dot_iq1_s_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
static void ggml_vec_dot_iq1_m_q8_K_vl256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
assert(n % QK_K == 0);
|
assert(n % QK_K == 0);
|
||||||
assert(nrc == 1);
|
assert(nrc == 1);
|
||||||
UNUSED(nrc);
|
UNUSED(nrc);
|
||||||
|
|
@ -2590,7 +2616,6 @@ void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
|
|
||||||
const int nb = n / QK_K;
|
const int nb = n / QK_K;
|
||||||
|
|
||||||
#if defined __riscv_v_intrinsic
|
|
||||||
iq1m_scale_t scale;
|
iq1m_scale_t scale;
|
||||||
float sumf = 0.0f;
|
float sumf = 0.0f;
|
||||||
for (int i = 0; i < nb; ++i) {
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
@ -2683,12 +2708,19 @@ void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = sumf;
|
*s = sumf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||||
|
#if defined __riscv_v_intrinsic
|
||||||
|
switch (__riscv_vlenb() * 8) {
|
||||||
|
case 256:
|
||||||
|
ggml_vec_dot_iq1_m_q8_K_vl256(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ggml_vec_dot_iq1_m_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
|
break;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
UNUSED(x);
|
|
||||||
UNUSED(y);
|
|
||||||
UNUSED(nb);
|
|
||||||
|
|
||||||
ggml_vec_dot_iq1_m_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
ggml_vec_dot_iq1_m_q8_K_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue