ggml-cpu: add ggml_thread_cpu_relax with Zihintpause support (#17784)
* ggml-cpu: add ggml_thread_cpu_relax with Zihintpause support Signed-off-by: Wang Yang <yangwang@iscas.ac.cn> * cmake: enable RISC-V zihintpause extension for Spacemit builds * readme : add ZIHINTPAUSE support for RISC-V --------- Signed-off-by: Wang Yang <yangwang@iscas.ac.cn>
This commit is contained in:
parent
4d3726278b
commit
79d61896d3
|
|
@ -291,6 +291,7 @@ jobs:
|
||||||
-DGGML_RVV=ON \
|
-DGGML_RVV=ON \
|
||||||
-DGGML_RV_ZFH=ON \
|
-DGGML_RV_ZFH=ON \
|
||||||
-DGGML_RV_ZICBOP=ON \
|
-DGGML_RV_ZICBOP=ON \
|
||||||
|
-DGGML_RV_ZIHINTPAUSE=ON \
|
||||||
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
|
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
|
||||||
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
|
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ range of hardware - locally and in the cloud.
|
||||||
- Plain C/C++ implementation without any dependencies
|
- Plain C/C++ implementation without any dependencies
|
||||||
- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks
|
- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks
|
||||||
- AVX, AVX2, AVX512 and AMX support for x86 architectures
|
- AVX, AVX2, AVX512 and AMX support for x86 architectures
|
||||||
- RVV, ZVFH, ZFH and ZICBOP support for RISC-V architectures
|
- RVV, ZVFH, ZFH, ZICBOP and ZIHINTPAUSE support for RISC-V architectures
|
||||||
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
|
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
|
||||||
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads GPUs via MUSA)
|
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads GPUs via MUSA)
|
||||||
- Vulkan and SYCL backend support
|
- Vulkan and SYCL backend support
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ cmake -B build \
|
||||||
-DGGML_RVV=ON \
|
-DGGML_RVV=ON \
|
||||||
-DGGML_RV_ZFH=ON \
|
-DGGML_RV_ZFH=ON \
|
||||||
-DGGML_RV_ZICBOP=ON \
|
-DGGML_RV_ZICBOP=ON \
|
||||||
|
-DGGML_RV_ZIHINTPAUSE=ON \
|
||||||
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
|
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
|
||||||
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
|
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
|
||||||
-DCMAKE_INSTALL_PREFIX=build/installed
|
-DCMAKE_INSTALL_PREFIX=build/installed
|
||||||
|
|
|
||||||
|
|
@ -168,6 +168,7 @@ option(GGML_RVV "ggml: enable rvv" ON)
|
||||||
option(GGML_RV_ZFH "ggml: enable riscv zfh" ON)
|
option(GGML_RV_ZFH "ggml: enable riscv zfh" ON)
|
||||||
option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON)
|
option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON)
|
||||||
option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON)
|
option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON)
|
||||||
|
option(GGML_RV_ZIHINTPAUSE "ggml: enable riscv zihintpause " ON)
|
||||||
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
|
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
|
||||||
option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE})
|
option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -469,6 +469,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
||||||
if (GGML_RV_ZICBOP)
|
if (GGML_RV_ZICBOP)
|
||||||
string(APPEND MARCH_STR "_zicbop")
|
string(APPEND MARCH_STR "_zicbop")
|
||||||
endif()
|
endif()
|
||||||
|
if (GGML_RV_ZIHINTPAUSE)
|
||||||
|
string(APPEND MARCH_STR "_zihintpause")
|
||||||
|
endif()
|
||||||
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
|
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
|
||||||
else()
|
else()
|
||||||
# Begin with the lowest baseline
|
# Begin with the lowest baseline
|
||||||
|
|
|
||||||
|
|
@ -490,6 +490,15 @@ static inline void ggml_thread_cpu_relax(void) {
|
||||||
static inline void ggml_thread_cpu_relax(void) {
|
static inline void ggml_thread_cpu_relax(void) {
|
||||||
_mm_pause();
|
_mm_pause();
|
||||||
}
|
}
|
||||||
|
#elif defined(__riscv)
|
||||||
|
static inline void ggml_thread_cpu_relax(void) {
|
||||||
|
#ifdef __riscv_zihintpause
|
||||||
|
__asm__ __volatile__ ("pause");
|
||||||
|
#else
|
||||||
|
/* Encoding of the pause instruction */
|
||||||
|
__asm__ __volatile__ (".4byte 0x100000F");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
static inline void ggml_thread_cpu_relax(void) {;}
|
static inline void ggml_thread_cpu_relax(void) {;}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue