ggml-cpu : fix RISC-V Q4_0 repack select and RVV feature reporting (#17951)
* ggml-cpu:fix RISC-V Q4_0 repack select and RVV feature reporting Signed-off-by: Wang Yang <yangwang@iscas.ac.cn> * using the name VLEN instead of CNT * Update ggml/include/ggml-cpu.h --------- Signed-off-by: Wang Yang <yangwang@iscas.ac.cn> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
17158965ac
commit
51604435e8
|
|
@ -99,6 +99,7 @@ extern "C" {
|
||||||
GGML_BACKEND_API int ggml_cpu_has_sme (void);
|
GGML_BACKEND_API int ggml_cpu_has_sme (void);
|
||||||
// other
|
// other
|
||||||
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
||||||
|
GGML_BACKEND_API int ggml_cpu_get_rvv_vlen (void); // risc-v vector length in bytes
|
||||||
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
||||||
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
|
||||||
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,11 @@ struct ggml_arm_arch_features_type {
|
||||||
} ggml_arm_arch_features = { 0 };
|
} ggml_arm_arch_features = { 0 };
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__riscv)
|
||||||
|
struct ggml_riscv_arch_features_type {
|
||||||
|
int rvv_vlen;
|
||||||
|
} ggml_riscv_arch_features = { 0 };
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
|
|
||||||
|
|
@ -703,6 +708,15 @@ static void ggml_init_arm_arch_features(void) {}
|
||||||
#endif
|
#endif
|
||||||
#endif // __ARM_ARCH
|
#endif // __ARM_ARCH
|
||||||
|
|
||||||
|
#if defined(__riscv) && defined(__riscv_v_intrinsic)
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
static void ggml_init_riscv_arch_features(void) {
|
||||||
|
ggml_riscv_arch_features.rvv_vlen = __riscv_vlenb();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void ggml_init_riscv_arch_features(void) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
||||||
GGML_ASSERT(!ggml_get_no_alloc(ctx));
|
GGML_ASSERT(!ggml_get_no_alloc(ctx));
|
||||||
|
|
||||||
|
|
@ -3459,6 +3473,14 @@ int ggml_cpu_has_riscv_v(void) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ggml_cpu_get_rvv_vlen(void) {
|
||||||
|
#if defined(__riscv) && defined(__riscv_v_intrinsic)
|
||||||
|
return ggml_riscv_arch_features.rvv_vlen;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_f16c(void) {
|
int ggml_cpu_has_f16c(void) {
|
||||||
#if defined(__F16C__)
|
#if defined(__F16C__)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
@ -3625,6 +3647,10 @@ void ggml_cpu_init(void) {
|
||||||
ggml_init_arm_arch_features();
|
ggml_init_arm_arch_features();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__riscv)
|
||||||
|
ggml_init_riscv_arch_features();
|
||||||
|
#endif
|
||||||
|
|
||||||
is_first_call = false;
|
is_first_call = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -583,6 +583,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
||||||
if (ggml_cpu_has_riscv_v()) {
|
if (ggml_cpu_has_riscv_v()) {
|
||||||
features.push_back({ "RISCV_V", "1" });
|
features.push_back({ "RISCV_V", "1" });
|
||||||
}
|
}
|
||||||
|
if (ggml_cpu_get_rvv_vlen() > 0) {
|
||||||
|
static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen());
|
||||||
|
features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
|
||||||
|
}
|
||||||
if (ggml_cpu_has_vsx()) {
|
if (ggml_cpu_has_vsx()) {
|
||||||
features.push_back({ "VSX", "1" });
|
features.push_back({ "VSX", "1" });
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2169,7 +2169,8 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
|
||||||
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 8, 8, GGML_TYPE_Q8_0> iq4_nl_8x8_q8_0;
|
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 8, 8, GGML_TYPE_Q8_0> iq4_nl_8x8_q8_0;
|
||||||
|
|
||||||
if (cur->type == GGML_TYPE_Q4_0) {
|
if (cur->type == GGML_TYPE_Q4_0) {
|
||||||
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
|
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)
|
||||||
|
|| (ggml_cpu_has_riscv_v() && (ggml_cpu_get_rvv_vlen() >= QK4_0))) {
|
||||||
if (cur->ne[1] % 8 == 0) {
|
if (cur->ne[1] % 8 == 0) {
|
||||||
return &q4_0_8x8_q8_0;
|
return &q4_0_8x8_q8_0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue