ggml-cpu: add ggml_thread_cpu_relax with Zihintpause support (#17784)

* ggml-cpu: add ggml_thread_cpu_relax with Zihintpause support

Signed-off-by: Wang Yang <yangwang@iscas.ac.cn>

* cmake: enable RISC-V zihintpause extension for Spacemit builds

* readme : add ZIHINTPAUSE support for RISC-V

---------

Signed-off-by: Wang Yang <yangwang@iscas.ac.cn>
This commit is contained in:
ixgbe 2025-12-08 16:41:34 +08:00 committed by GitHub
parent 4d3726278b
commit 79d61896d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 16 additions and 1 deletions

View File

@ -291,6 +291,7 @@ jobs:
-DGGML_RVV=ON \ -DGGML_RVV=ON \
-DGGML_RV_ZFH=ON \ -DGGML_RV_ZFH=ON \
-DGGML_RV_ZICBOP=ON \ -DGGML_RV_ZICBOP=ON \
-DGGML_RV_ZIHINTPAUSE=ON \
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \ -DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake

View File

@ -61,7 +61,7 @@ range of hardware - locally and in the cloud.
- Plain C/C++ implementation without any dependencies - Plain C/C++ implementation without any dependencies
- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks - Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks
- AVX, AVX2, AVX512 and AMX support for x86 architectures - AVX, AVX2, AVX512 and AMX support for x86 architectures
- RVV, ZVFH, ZFH and ZICBOP support for RISC-V architectures - RVV, ZVFH, ZFH, ZICBOP and ZIHINTPAUSE support for RISC-V architectures
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use - 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads GPUs via MUSA) - Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads GPUs via MUSA)
- Vulkan and SYCL backend support - Vulkan and SYCL backend support

View File

@ -19,6 +19,7 @@ cmake -B build \
-DGGML_RVV=ON \ -DGGML_RVV=ON \
-DGGML_RV_ZFH=ON \ -DGGML_RV_ZFH=ON \
-DGGML_RV_ZICBOP=ON \ -DGGML_RV_ZICBOP=ON \
-DGGML_RV_ZIHINTPAUSE=ON \
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \ -DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \ -DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
-DCMAKE_INSTALL_PREFIX=build/installed -DCMAKE_INSTALL_PREFIX=build/installed

View File

@ -168,6 +168,7 @@ option(GGML_RVV "ggml: enable rvv" ON)
option(GGML_RV_ZFH "ggml: enable riscv zfh" ON) option(GGML_RV_ZFH "ggml: enable riscv zfh" ON)
option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON) option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON)
option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON) option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON)
option(GGML_RV_ZIHINTPAUSE "ggml: enable riscv zihintpause " ON)
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF) option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE}) option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE})

View File

@ -469,6 +469,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
if (GGML_RV_ZICBOP) if (GGML_RV_ZICBOP)
string(APPEND MARCH_STR "_zicbop") string(APPEND MARCH_STR "_zicbop")
endif() endif()
if (GGML_RV_ZIHINTPAUSE)
string(APPEND MARCH_STR "_zihintpause")
endif()
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d) list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
else() else()
# Begin with the lowest baseline # Begin with the lowest baseline

View File

@ -490,6 +490,15 @@ static inline void ggml_thread_cpu_relax(void) {
static inline void ggml_thread_cpu_relax(void) { static inline void ggml_thread_cpu_relax(void) {
_mm_pause(); _mm_pause();
} }
#elif defined(__riscv)
static inline void ggml_thread_cpu_relax(void) {
#ifdef __riscv_zihintpause
__asm__ __volatile__ ("pause");
#else
/* Encoding of the pause instruction */
__asm__ __volatile__ (".4byte 0x100000F");
#endif
}
#else #else
static inline void ggml_thread_cpu_relax(void) {;} static inline void ggml_thread_cpu_relax(void) {;}
#endif #endif