From 51604435e8efffb4f2e1991ddd2bbc01774dd0fc Mon Sep 17 00:00:00 2001 From: ixgbe <1113177880@qq.com> Date: Fri, 12 Dec 2025 22:26:03 +0800 Subject: [PATCH] ggml-cpu : fix RISC-V Q4_0 repack select and RVV feature reporting (#17951) * ggml-cpu:fix RISC-V Q4_0 repack select and RVV feature reporting Signed-off-by: Wang Yang * using the name VLEN instead of CNT * Update ggml/include/ggml-cpu.h --------- Signed-off-by: Wang Yang Co-authored-by: Georgi Gerganov --- ggml/include/ggml-cpu.h | 1 + ggml/src/ggml-cpu/ggml-cpu.c | 26 ++++++++++++++++++++++++++ ggml/src/ggml-cpu/ggml-cpu.cpp | 4 ++++ ggml/src/ggml-cpu/repack.cpp | 3 ++- 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ggml/include/ggml-cpu.h b/ggml/include/ggml-cpu.h index 9edd485136..4f3b99c8d0 100644 --- a/ggml/include/ggml-cpu.h +++ b/ggml/include/ggml-cpu.h @@ -99,6 +99,7 @@ extern "C" { GGML_BACKEND_API int ggml_cpu_has_sme (void); // other GGML_BACKEND_API int ggml_cpu_has_riscv_v (void); + GGML_BACKEND_API int ggml_cpu_get_rvv_vlen (void); // risc-v vector length in bytes GGML_BACKEND_API int ggml_cpu_has_vsx (void); GGML_BACKEND_API int ggml_cpu_has_vxe (void); GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void); diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index c47511adcb..a59b518938 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -81,6 +81,11 @@ struct ggml_arm_arch_features_type { } ggml_arm_arch_features = { 0 }; #endif +#if defined(__riscv) +struct ggml_riscv_arch_features_type { + int rvv_vlen; +} ggml_riscv_arch_features = { 0 }; +#endif #if defined(_WIN32) @@ -703,6 +708,15 @@ static void ggml_init_arm_arch_features(void) {} #endif #endif // __ARM_ARCH +#if defined(__riscv) && defined(__riscv_v_intrinsic) +#include +static void ggml_init_riscv_arch_features(void) { + ggml_riscv_arch_features.rvv_vlen = __riscv_vlenb(); +} +#else +static void ggml_init_riscv_arch_features(void) {} +#endif + struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) { GGML_ASSERT(!ggml_get_no_alloc(ctx)); @@ -3459,6 +3473,14 @@ int ggml_cpu_has_riscv_v(void) { #endif } +int ggml_cpu_get_rvv_vlen(void) { +#if defined(__riscv) && defined(__riscv_v_intrinsic) + return ggml_riscv_arch_features.rvv_vlen; +#else + return 0; +#endif +} + int ggml_cpu_has_f16c(void) { #if defined(__F16C__) return 1; @@ -3625,6 +3647,10 @@ void ggml_cpu_init(void) { ggml_init_arm_arch_features(); #endif +#if defined(__riscv) + ggml_init_riscv_arch_features(); +#endif + is_first_call = false; } diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index 3191faaa4c..f4713a4218 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -583,6 +583,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r if (ggml_cpu_has_riscv_v()) { features.push_back({ "RISCV_V", "1" }); } + if (ggml_cpu_get_rvv_vlen() > 0) { + static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen()); + features.push_back({ "RVV_VLEN", rvv_vlen.c_str() }); + } if (ggml_cpu_has_vsx()) { features.push_back({ "VSX", "1" }); } diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp index 9f0d449bd6..b70ea7d78b 100644 --- a/ggml/src/ggml-cpu/repack.cpp +++ b/ggml/src/ggml-cpu/repack.cpp @@ -2169,7 +2169,8 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons static const ggml::cpu::repack::tensor_traits iq4_nl_8x8_q8_0; if (cur->type == GGML_TYPE_Q4_0) { - if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) { + if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) + || (ggml_cpu_has_riscv_v() && (ggml_cpu_get_rvv_vlen() >= QK4_0))) { if (cur->ne[1] % 8 == 0) { return &q4_0_8x8_q8_0; }