ggml-cpu: add repack GEMM and GEMV for floating-point (#4)

This commit is contained in:
Taimur Ahmad 2025-12-23 15:13:09 +05:00 committed by taimur-10x
parent 41bee681d9
commit fd94e4cdca
4 changed files with 140 additions and 239 deletions

View File

@ -37,8 +37,6 @@
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@ -78,33 +76,15 @@
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
#define ggml_gemv_mxfp4_8x8_q8_0_generic ggml_gemv_mxfp4_8x8_q8_0
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
#define ggml_gemv_f16_1x16_f16_generic ggml_gemv_f16_1x16_f16
#define ggml_gemv_f16_1x32_f16_generic ggml_gemv_f16_1x32_f16
#define ggml_gemv_f16_1x64_f16_generic ggml_gemv_f16_1x64_f16
#define ggml_gemv_f16_1x128_f16_generic ggml_gemv_f16_1x128_f16
#define ggml_gemv_f32_1x16_f32_generic ggml_gemv_f32_1x16_f32
#define ggml_gemv_f32_1x32_f32_generic ggml_gemv_f32_1x32_f32
#define ggml_gemv_f32_1x64_f32_generic ggml_gemv_f32_1x64_f32
#define ggml_gemv_f32_1x128_f32_generic ggml_gemv_f32_1x128_f32
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
#define ggml_gemm_mxfp4_8x8_q8_0_generic ggml_gemm_mxfp4_8x8_q8_0
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
#define ggml_gemm_f16_7x1x16_f16_generic ggml_gemm_f16_7x1x16_f16
#define ggml_gemm_f16_7x1x32_f16_generic ggml_gemm_f16_7x1x32_f16
#define ggml_gemm_f16_7x1x64_f16_generic ggml_gemm_f16_7x1x64_f16
#define ggml_gemm_f16_7x1x128_f16_generic ggml_gemm_f16_7x1x128_f16
#define ggml_gemm_f32_7x1x16_f32_generic ggml_gemm_f32_7x1x16_f32
#define ggml_gemm_f32_7x1x32_f32_generic ggml_gemm_f32_7x1x32_f32
#define ggml_gemm_f32_7x1x64_f32_generic ggml_gemm_f32_7x1x64_f32
#define ggml_gemm_f32_7x1x128_f32_generic ggml_gemm_f32_7x1x128_f32
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
// quants.c
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
// repack.cpp
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
#define ggml_gemv_q4_K_8x4_q8_K_generic ggml_gemv_q4_K_8x4_q8_K
@ -140,8 +120,6 @@
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@ -187,8 +165,6 @@
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@ -281,8 +257,6 @@
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
@ -336,8 +310,6 @@
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
#define ggml_quantize_mat_q8_K_4x4_generic ggml_quantize_mat_q8_K_4x4
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
#define ggml_repack_mat_f16_7x1_generic ggml_repack_mat_f16_7x1
#define ggml_repack_mat_f32_7x1_generic ggml_repack_mat_f32_7x1
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0

View File

@ -665,6 +665,97 @@ void ggml_gemv_q2_K_16x1_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
__riscv_vse32_v_f32m2(s + col_tile, v_sumf, vl);
}
}
template<int ncols_interleaved>
static inline void ggml_gemv_f16_1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
GGML_UNUSED(bs);
const int nb = n / 1;
assert (nr == 1);
assert(n % 1 == 0);
assert(nc % ncols_interleaved == 0);
const _Float16 * a_ptr = (const _Float16 *) vy;
for (int x = 0; x < nc / ncols_interleaved; x++) {
const block_f16<ncols_interleaved, 1> * b_ptr = (const block_f16<ncols_interleaved, 1> *) vx + (x * nb);
// Accumulators
vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
for (int l = 0; l < nb; l++) {
vfloat16m2_t b_0 = __riscv_vle16_v_f16m2((const _Float16 *)&b_ptr[l].d[0], ncols_interleaved);
sumf_0 = __riscv_vfwmacc_vf_f32m4(sumf_0, *(const _Float16*)(&a_ptr[l]), b_0, ncols_interleaved);
}
__riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
}
return;
}
void ggml_gemv_f16_1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f16_1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f16_1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f16_1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f16_1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f16_1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f16_1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f16_1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
}
template<int ncols_interleaved>
static inline void ggml_gemv_f32_1xM_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
GGML_UNUSED(bs);
const int nb = n / 1;
assert (nr == 1);
assert(n % 1 == 0);
assert(nc % ncols_interleaved == 0);
const float * a_ptr = (const float *) vy;
for (int x = 0; x < nc / ncols_interleaved; x++) {
const block_f32<ncols_interleaved, 1> * b_ptr = (const block_f32<ncols_interleaved, 1> *) vx + (x * nb);
// Accumulators
vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
for (int l = 0; l < nb; l++) {
vfloat32m4_t b_0 = __riscv_vle32_v_f32m4((const float *)&b_ptr[l].d[0], ncols_interleaved);
sumf_0 = __riscv_vfmacc_vf_f32m4(sumf_0, *(const float*)(&a_ptr[l]), b_0, ncols_interleaved);
}
__riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
}
return;
}
void ggml_gemv_f32_1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f32_1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f32_1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f32_1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f32_1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f32_1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f32_1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f32_1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
}
#endif
void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
@ -1700,125 +1791,7 @@ void ggml_gemm_q2_K_16x1_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
}
}
}
#endif
template<int ncols_interleaved>
static inline void ggml_gemv_f16_1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
const int nb = n / 1;
assert (nr == 1);
assert(n % 1 == 0);
assert(nc % ncols_interleaved == 0);
const _Float16 * a_ptr = (const _Float16 *) vy;
for (int x = 0; x < nc / ncols_interleaved; x++) {
const block_f16<ncols_interleaved, 1> * b_ptr = (const block_f16<ncols_interleaved, 1> *) vx + (x * nb);
// Accumulators
vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
for (int l = 0; l < nb; l++) {
vfloat16m2_t b_0 = __riscv_vle16_v_f16m2((const _Float16 *)&b_ptr[l].d[0], ncols_interleaved);
sumf_0 = __riscv_vfwmacc_vf_f32m4(sumf_0, *(const _Float16*)(&a_ptr[l]), b_0, ncols_interleaved);
}
__riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
}
return;
}
void ggml_gemv_f16_1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemv_f16_1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemv_f16_1x16_f16_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f16_1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemv_f16_1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemv_f16_1x32_f16_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f16_1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemv_f16_1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemv_f16_1x64_f16_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f16_1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemv_f16_1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemv_f16_1x128_f16_generic(n, s, bs, vx, vy, nr, nc);
}
template<int ncols_interleaved>
static inline void ggml_gemv_f32_1xM_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
const int nb = n / 1;
assert (nr == 1);
assert(n % 1 == 0);
assert(nc % ncols_interleaved == 0);
const float * a_ptr = (const float *) vy;
for (int x = 0; x < nc / ncols_interleaved; x++) {
const block_f32<ncols_interleaved, 1> * b_ptr = (const block_f32<ncols_interleaved, 1> *) vx + (x * nb);
// Accumulators
vfloat32m4_t sumf_0 = __riscv_vfmv_v_f_f32m4(0.0f, ncols_interleaved);
for (int l = 0; l < nb; l++) {
vfloat32m4_t b_0 = __riscv_vle32_v_f32m4((const float *)&b_ptr[l].d[0], ncols_interleaved);
sumf_0 = __riscv_vfmacc_vf_f32m4(sumf_0, *(const float*)(&a_ptr[l]), b_0, ncols_interleaved);
}
__riscv_vse32_v_f32m4(&s[x * ncols_interleaved], sumf_0, ncols_interleaved);
}
return;
}
void ggml_gemv_f32_1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemv_f32_1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemv_f32_1x16_f32_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f32_1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemv_f32_1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemv_f32_1x32_f32_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f32_1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemv_f32_1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemv_f32_1x64_f32_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemv_f32_1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemv_f32_1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemv_f32_1x128_f32_generic(n, s, bs, vx, vy, nr, nc);
}
template<int ncols_interleaved>
static inline void ggml_gemm_f16_7x1xM_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
@ -1867,35 +1840,19 @@ static inline void ggml_gemm_f16_7x1xM_f16(int n, float * GGML_RESTRICT s, size_
}
void ggml_gemm_f16_7x1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemm_f16_7x1xM_f16<16>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemm_f16_7x1x16_f16_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemm_f16_7x1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemm_f16_7x1xM_f16<32>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemm_f16_7x1x32_f16_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemm_f16_7x1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemm_f16_7x1xM_f16<64>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemm_f16_7x1x64_f16_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemm_f16_7x1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemm_f16_7x1xM_f16<128>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemm_f16_7x1x128_f16_generic(n, s, bs, vx, vy, nr, nc);
}
template<int ncols_interleaved>
@ -1945,33 +1902,18 @@ static inline void ggml_gemm_f32_7x1xM_f32(int n, float * GGML_RESTRICT s, size_
}
void ggml_gemm_f32_7x1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemm_f32_7x1xM_f32<16>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemm_f32_7x1x16_f32_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemm_f32_7x1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemm_f32_7x1xM_f32<32>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemm_f32_7x1x32_f32_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemm_f32_7x1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemm_f32_7x1xM_f32<64>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemm_f32_7x1x64_f32_generic(n, s, bs, vx, vy, nr, nc);
}
void ggml_gemm_f32_7x1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
#if defined __riscv_v_intrinsic
ggml_gemm_f32_7x1xM_f32<128>(n, s, bs, vx, vy, nr, nc);
return;
#endif
ggml_gemm_f32_7x1x128_f32_generic(n, s, bs, vx, vy, nr, nc);
}
#endif

View File

@ -164,6 +164,14 @@ void ggml_quantize_mat_q8_K_4x1_generic(const float * GGML_RESTRICT x, void * GG
}
}
}
void ggml_repack_mat_f16_7x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
ggml_repack_mat_f16_NxK_generic<7, 1>(x, vy, k);
}
void ggml_repack_mat_f32_7x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
ggml_repack_mat_f32_NxK_generic<7, 1>(x, vy, k);
}
#endif
void ggml_quantize_mat_q8_0_4x4_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
@ -344,16 +352,6 @@ void ggml_quantize_mat_q8_K_4x8_generic(const float * GGML_RESTRICT x, void * GG
}
}
#if defined __riscv_zvfh
void ggml_repack_mat_f16_7x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
ggml_repack_mat_f16_NxK_generic<7, 1>(x, vy, k);
}
void ggml_repack_mat_f32_7x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
ggml_repack_mat_f32_NxK_generic<7, 1>(x, vy, k);
}
#endif
} // extern "C"
template <int64_t NB_ROWS, int64_t INTER_SIZE, ggml_type PARAM_TYPE>
@ -384,20 +382,18 @@ template <> void ggml_repack_mat_t<4, 8, GGML_TYPE_Q8_K>(const float * GGML_REST
}
#if defined __riscv_zvfh
template <> void ggml_quantize_mat_t<1, GGML_TYPE_Q8_0>(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t nrow, int64_t n_per_row) {
template <> void ggml_repack_mat_t<4, 1, GGML_TYPE_Q8_0>(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t nrow, int64_t n_per_row) {
assert(nrow == 4);
UNUSED(nrow);
ggml_quantize_mat_q8_0_4x1(x, vy, n_per_row);
}
template <> void ggml_quantize_mat_t<1, GGML_TYPE_Q8_K>(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t nrow, int64_t n_per_row) {
template <> void ggml_repack_mat_t<4, 1, GGML_TYPE_Q8_K>(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t nrow, int64_t n_per_row) {
assert(nrow == 4);
UNUSED(nrow);
ggml_quantize_mat_q8_K_4x1(x, vy, n_per_row);
}
#endif
#if defined __riscv_zvfh
template <> void ggml_repack_mat_t<7, 1, GGML_TYPE_F16>(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t nrow, int64_t n_per_row) {
assert(nrow == 7);
UNUSED(nrow);
@ -1845,9 +1841,7 @@ void ggml_gemv_q2_K_16x1_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
}
}
}
#endif
#if defined __riscv_zvfh
void ggml_gemv_f16_1x16_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemv_f16_KxM_f16_generic<1, 16>(n, s, bs, vx, vy, nr, nc);
}
@ -2944,9 +2938,7 @@ void ggml_gemm_q2_K_16x1_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
}
}
}
#endif
#if defined __riscv_zvfh
void ggml_gemm_f16_7x1x16_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
ggml_gemm_f16_NxKxM_f16_generic<7, 1, 16>(n, s, bs, vx, vy, nr, nc);
}
@ -4286,9 +4278,7 @@ template <> int repack<block_q8_0, 1, 16>(struct ggml_tensor * t, const void * d
template <> int repack<block_q2_K, 1, 16>(struct ggml_tensor * t, const void * data, size_t data_size) {
return repack_q2_K_to_q2_K_16_bl(t, 1, data, data_size);
}
#endif
#if defined __riscv_zvfh
template <> int repack<ggml_half, 1, 16>(struct ggml_tensor * t, const void * data, size_t data_size) {
return repack_f16_to_f16_MxK_bl<16, 1>(t, data, data_size);
}
@ -4411,9 +4401,7 @@ template <> void gemv<block_q8_0, 1, 16, GGML_TYPE_Q8_0>(int n, float * s, size_
template <> void gemv<block_q2_K, 1, 16, GGML_TYPE_Q8_K>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
ggml_gemv_q2_K_16x1_q8_K(n, s, bs, vx, vy, nr, nc);
}
#endif
#if defined __riscv_zvfh
template <> void gemv<ggml_half, 1, 16, GGML_TYPE_F16>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
ggml_gemv_f16_1x16_f16(n, s, bs, vx, vy, nr, nc);
}
@ -4523,28 +4511,26 @@ template <> void gemm<block_q8_0, 4, 8, 4, GGML_TYPE_Q8_0>(int n, float * s, siz
}
#if defined __riscv_zvfh
template <> void gemm<block_q4_0, 1, 16, GGML_TYPE_Q8_0>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
template <> void gemm<block_q4_0, 4, 1, 16, GGML_TYPE_Q8_0>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
ggml_gemm_q4_0_16x1_q8_0(n, s, bs, vx, vy, nr, nc);
}
template <> void gemm<block_q4_K, 1, 16, GGML_TYPE_Q8_K>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
template <> void gemm<block_q4_K, 4, 1, 16, GGML_TYPE_Q8_K>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
ggml_gemm_q4_K_16x1_q8_K(n, s, bs, vx, vy, nr, nc);
}
template <> void gemm<block_iq4_nl, 1, 16, GGML_TYPE_Q8_0>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
template <> void gemm<block_iq4_nl, 4, 1, 16, GGML_TYPE_Q8_0>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
ggml_gemm_iq4_nl_16x1_q8_0(n, s, bs, vx, vy, nr, nc);
}
template <> void gemm<block_q8_0, 1, 16, GGML_TYPE_Q8_0>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
template <> void gemm<block_q8_0, 4, 1, 16, GGML_TYPE_Q8_0>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
ggml_gemm_q8_0_16x1_q8_0(n, s, bs, vx, vy, nr, nc);
}
template <> void gemm<block_q2_K, 1, 16, GGML_TYPE_Q8_K>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
template <> void gemm<block_q2_K, 4, 1, 16, GGML_TYPE_Q8_K>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
ggml_gemm_q2_K_16x1_q8_K(n, s, bs, vx, vy, nr, nc);
}
#endif
#if defined __riscv_zvfh
template <> void gemm<ggml_half, 7, 1, 16, GGML_TYPE_F16>(int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
ggml_gemm_f16_7x1x16_f16(n, s, bs, vx, vy, nr, nc);
}
@ -4991,23 +4977,19 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
// These implement outer-product style matrix multiplication kernels with
// an interleave of 1.
#if defined __riscv_zvfh
static const ggml::cpu::repack::tensor_traits<block_q4_0, 1, 16, GGML_TYPE_Q8_0> q4_0_16x1_q8_0;
static const ggml::cpu::repack::tensor_traits<block_q4_K, 1, 16, GGML_TYPE_Q8_K> q4_K_16x1_q8_K;
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 1, 16, GGML_TYPE_Q8_0> iq4_nl_16x1_q8_0;
static const ggml::cpu::repack::tensor_traits<block_q8_0, 1, 16, GGML_TYPE_Q8_0> q8_0_16x1_q8_0;
static const ggml::cpu::repack::tensor_traits<block_q2_K, 1, 16, GGML_TYPE_Q8_K> q2_K_16x1_q8_K;
#endif
static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 1, 16, GGML_TYPE_Q8_0> q4_0_16x1_q8_0;
static const ggml::cpu::repack::tensor_traits<block_q4_K, 4, 1, 16, GGML_TYPE_Q8_K> q4_K_16x1_q8_K;
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 1, 16, GGML_TYPE_Q8_0> iq4_nl_16x1_q8_0;
static const ggml::cpu::repack::tensor_traits<block_q8_0, 4, 1, 16, GGML_TYPE_Q8_0> q8_0_16x1_q8_0;
static const ggml::cpu::repack::tensor_traits<block_q2_K, 4, 1, 16, GGML_TYPE_Q8_K> q2_K_16x1_q8_K;
// instance for F16
#if defined __riscv_zvfh
static const ggml::cpu::repack::tensor_traits<ggml_half, 7, 1, 16, GGML_TYPE_F16> f16_7x16x1_f16;
static const ggml::cpu::repack::tensor_traits<ggml_half, 7, 1, 32, GGML_TYPE_F16> f16_7x32x1_f16;
static const ggml::cpu::repack::tensor_traits<ggml_half, 7, 1, 64, GGML_TYPE_F16> f16_7x64x1_f16;
static const ggml::cpu::repack::tensor_traits<ggml_half, 7, 1, 128, GGML_TYPE_F16> f16_7x128x1_f16;
#endif
// instance for F32
#if defined __riscv_zvfh
static const ggml::cpu::repack::tensor_traits<float, 7, 1, 16, GGML_TYPE_F32> f32_7x16x1_f32;
static const ggml::cpu::repack::tensor_traits<float, 7, 1, 32, GGML_TYPE_F32> f32_7x32x1_f32;
static const ggml::cpu::repack::tensor_traits<float, 7, 1, 64, GGML_TYPE_F32> f32_7x64x1_f32;

View File

@ -149,6 +149,7 @@ struct block_f32 {
using block_f32_32x1 = block_f32<32, 1>;
using block_f32_7x1 = block_f32<7, 1>;
using block_f32_4x1 = block_f32<4, 1>;
#if defined(__cplusplus)
extern "C" {
@ -190,6 +191,8 @@ void ggml_gemm_mxfp4_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v
void ggml_gemm_mxfp4_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_q8_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_q8_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
// RISC-V
#if defined __riscv_zvfh
void ggml_quantize_mat_q8_0_4x1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_quantize_mat_q8_K_4x1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
@ -203,6 +206,28 @@ void ggml_gemm_q4_K_16x1_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v
void ggml_gemm_iq4_nl_16x1_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_q8_0_16x1_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_q2_K_16x1_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
// FP16
void ggml_repack_mat_f16_7x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_gemv_f16_1x16_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x32_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x64_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x128_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x16_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x32_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x64_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x128_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
// FP32
void ggml_repack_mat_f32_7x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_gemv_f32_1x16_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f32_1x32_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f32_1x64_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f32_1x128_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x16_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x32_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x64_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x128_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
#endif
// Native implementations
@ -242,6 +267,8 @@ void ggml_gemm_mxfp4_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs,
void ggml_gemm_mxfp4_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_q8_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_q8_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
// RISC-V
#if defined __riscv_zvfh
void ggml_quantize_mat_q8_0_4x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_quantize_mat_q8_K_4x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
@ -255,42 +282,19 @@ void ggml_gemm_q4_K_16x1_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
void ggml_gemm_q8_0_16x1_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_q2_K_16x1_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_iq4_nl_16x1_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
#endif
// FP16
void ggml_repack_mat_f16_4x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_repack_mat_f16_7x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_gemv_f16_1x16_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x32_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x64_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x128_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_4x1x32_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x16_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x32_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x64_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x128_f16_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_repack_mat_f16_4x1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_repack_mat_f16_7x1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_gemv_f16_1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f16_1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_4x1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x16_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x32_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x64_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f16_7x1x128_f16(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
// FP32
void ggml_repack_mat_f32_7x1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_gemv_f32_1x16_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f32_1x32_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f32_1x64_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f32_1x128_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x16_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x32_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x64_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x128_f32_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_repack_mat_f32_7x1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
void ggml_gemv_f32_1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemv_f32_1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
@ -300,6 +304,7 @@ void ggml_gemm_f32_7x1x16_f32(int n, float * GGML_RESTRICT s, size_t bs, const v
void ggml_gemm_f32_7x1x32_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x64_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
void ggml_gemm_f32_7x1x128_f32(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
#endif
#if defined(__cplusplus)
} // extern "C"