diff --git a/ggml/src/ggml-cpu/arch/riscv/dispatch.cpp b/ggml/src/ggml-cpu/arch/riscv/dispatch.cpp index a028c8a006..55047c7455 100644 --- a/ggml/src/ggml-cpu/arch/riscv/dispatch.cpp +++ b/ggml/src/ggml-cpu/arch/riscv/dispatch.cpp @@ -9,7 +9,7 @@ extern "C" { #include "kernels.inc" } -#if defined(__riscv_v) +#if defined(__riscv_v) && __riscv_v >= 1000000 // helper macros for runtime kernel dispatch diff --git a/ggml/src/ggml-cpu/arch/riscv/quants.c b/ggml/src/ggml-cpu/arch/riscv/quants.c index 08ae4497ab..b1b987b721 100644 --- a/ggml/src/ggml-cpu/arch/riscv/quants.c +++ b/ggml/src/ggml-cpu/arch/riscv/quants.c @@ -32,7 +32,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i block_q8_0 * GGML_RESTRICT y = vy; -#if defined(__riscv_v) +#if defined(__riscv_v) && __riscv_v >= 1000000 size_t vl = QK8_0; @@ -72,7 +72,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i block_q8_1 * GGML_RESTRICT y = vy; -#if defined(__riscv_v) +#if defined(__riscv_v) && __riscv_v >= 1000000 size_t vl = QK8_1; @@ -118,7 +118,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i //===================================== Dot products ================================= void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { -#if defined(__riscv_v) +#if defined(__riscv_v) && __riscv_v >= 1000000 const int qk = QK8_0; const int nb = n / qk; @@ -173,7 +173,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { -#if defined(__riscv_v) +#if defined(__riscv_v) && __riscv_v >= 1000000 const int qk = QK8_1; const int nb = n / qk; @@ -224,7 +224,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { -#if defined(__riscv_v) +#if defined(__riscv_v) && __riscv_v >= 1000000 const int qk = QK8_0; const int nb = n / qk; @@ -278,7 +278,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { -#if defined(__riscv_v) +#if defined(__riscv_v) && __riscv_v >= 1000000 const int qk = QK8_1; const int nb = n / qk; @@ -347,7 +347,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi int ib = 0; float sumf = 0; -#if defined(__riscv_v) +#if defined(__riscv_v) && __riscv_v >= 1000000 size_t vl = qk; for (; ib < nb; ++ib) { @@ -488,7 +488,7 @@ void ggml_vec_dot_q2_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const *s = sumf; } -#elif defined(__riscv_v) +#elif defined(__riscv_v) && __riscv_v >= 1000000 void ggml_vec_dot_q2_K_q8_K_256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(nrc == 1); @@ -885,7 +885,7 @@ void ggml_vec_dot_q3_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const *s = sumf; } -#elif defined(__riscv_v) +#elif defined(__riscv_v) && __riscv_v >= 1000000 void ggml_vec_dot_q3_K_q8_K_256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -1291,7 +1291,7 @@ void ggml_vec_dot_q4_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const *s = sumf; } -#elif defined(__riscv_v) +#elif defined(__riscv_v) && __riscv_v >= 1000000 void ggml_vec_dot_q4_K_q8_K_256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -1590,7 +1590,7 @@ void ggml_vec_dot_q4_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const #endif // ggml_vec_dot_q4_K_q8_K -#if defined __riscv_v +#if defined(__riscv_v) && __riscv_v >= 1000000 void ggml_vec_dot_q5_K_q8_K_128(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -1793,7 +1793,7 @@ void ggml_vec_dot_q6_K_q8_K_071(int n, float * GGML_RESTRICT s, size_t bs, const *s = sumf; } -#elif defined(__riscv_v) +#elif defined(__riscv_v) && __riscv_v >= 1000000 void ggml_vec_dot_q6_K_q8_K_256(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); diff --git a/ggml/src/ggml-cpu/arch/riscv/repack.cpp b/ggml/src/ggml-cpu/arch/riscv/repack.cpp index 2a35ff9ad8..3e0d9059f5 100644 --- a/ggml/src/ggml-cpu/arch/riscv/repack.cpp +++ b/ggml/src/ggml-cpu/arch/riscv/repack.cpp @@ -43,7 +43,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo UNUSED(ncols_interleaved); UNUSED(blocklen); -#if defined __riscv_v +#if defined(__riscv_v) && __riscv_v >= 1000000 if (__riscv_vlenb() >= QK4_0) { const size_t vl = QK4_0; @@ -135,7 +135,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo UNUSED(ncols_interleaved); UNUSED(blocklen); -#if defined __riscv_v +#if defined(__riscv_v) && __riscv_v >= 1000000 if (__riscv_vlenb() >= QK4_0) { const size_t vl = QK4_0; diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp index 7dc36d4f8a..77d0214acb 100644 --- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp +++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp @@ -391,6 +391,8 @@ template <> inline vfloat16m2_t load(const ggml_fp16_t *p) { template <> inline vfloat16m4_t load(const ggml_fp16_t *p) { return __riscv_vle16_v_f16m4(reinterpret_cast(p), __riscv_vsetvlmax_e16m4()); } +#endif +#if defined(__riscv_v) && __riscv_v >= 1000000 template <> inline vfloat32m1_t load(const float *p) { return __riscv_vle32_v_f32m1(p, __riscv_vsetvlmax_e32m1()); } @@ -432,6 +434,8 @@ template <> inline vfloat16m2_t set_zero() { template <> inline vfloat16m4_t set_zero() { return __riscv_vfmv_v_f_f16m4(0, __riscv_vsetvlmax_e16m4()); } +#endif +#if defined(__riscv_v) && __riscv_v >= 1000000 template <> inline vfloat32m1_t set_zero() { return __riscv_vfmv_v_f_f32m1(0.0f, __riscv_vsetvlmax_e32m1()); } @@ -446,7 +450,7 @@ template <> inline vfloat32m8_t set_zero() { } #endif -#if defined(__riscv_v_intrinsic) +#if defined(__riscv_v) && __riscv_v >= 1000000 template size_t vlmax() { if constexpr (std::is_same_v) { return __riscv_vsetvlmax_e16mf2(); } else if constexpr (std::is_same_v) { return __riscv_vsetvlmax_e16m1(); } @@ -633,7 +637,7 @@ class tinyBLAS { const int64_t ldc; }; -#if defined(__riscv_v_intrinsic) +#if defined(__riscv_v) && __riscv_v >= 1000000 template class tinyBLAS_RVV { public: diff --git a/ggml/src/ggml-cpu/simd-mappings.h b/ggml/src/ggml-cpu/simd-mappings.h index 101a9c086b..0a27c8e9e1 100644 --- a/ggml/src/ggml-cpu/simd-mappings.h +++ b/ggml/src/ggml-cpu/simd-mappings.h @@ -97,6 +97,9 @@ extern "C" { return r; } #elif defined(__riscv) && defined(__riscv_zfhmin) + // suppress _Float16 warnings + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpedantic" static inline float riscv_compute_fp16_to_fp32(ggml_fp16_t h) { _Float16 hf; memcpy(&hf, &h, sizeof(ggml_fp16_t)); @@ -109,6 +112,7 @@ extern "C" { memcpy(&res, &hf, sizeof(ggml_fp16_t)); return res; } + #pragma GCC diagnostic pop #define GGML_CPU_COMPUTE_FP16_TO_FP32(x) riscv_compute_fp16_to_fp32(x) #define GGML_CPU_COMPUTE_FP32_TO_FP16(x) riscv_compute_fp32_to_fp16(x) @@ -1171,7 +1175,7 @@ static inline void __lzs_f16cx4_store(ggml_fp16_t * x, float32x4_t v_y) { #define GGML_F16_VEC_MUL GGML_F32x4_MUL #define GGML_F16_VEC_REDUCE GGML_F32x4_REDUCE -#elif defined(__riscv_v_intrinsic) +#elif defined(__riscv_v) && __riscv_v >= 1000000 // compatible with vlen >= 128