Build with CCCL 3.2 for CUDA backends
Gives best perf for backend-sampling on CUDA. Flag can be removed once CCCL 3.2 is bundled within CTK and that CTK version is used in llama.cpp
This commit is contained in:
parent
b5ec0fd76c
commit
1da013c66e
|
|
@ -1079,6 +1079,7 @@ jobs:
|
|||
evict-old-files: 1d
|
||||
|
||||
- name: Build with CMake
|
||||
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
|
||||
run: |
|
||||
cmake -S . -B build -G Ninja \
|
||||
-DLLAMA_CURL=OFF \
|
||||
|
|
@ -1088,7 +1089,8 @@ jobs:
|
|||
-DCMAKE_CUDA_ARCHITECTURES=89-real \
|
||||
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
|
||||
-DGGML_NATIVE=OFF \
|
||||
-DGGML_CUDA=ON
|
||||
-DGGML_CUDA=ON \
|
||||
-DGGML_CUDA_CUB_3DOT2=ON
|
||||
cmake --build build
|
||||
|
||||
windows-2022-cmake-cuda:
|
||||
|
|
@ -1123,6 +1125,7 @@ jobs:
|
|||
- name: Build
|
||||
id: cmake_build
|
||||
shell: cmd
|
||||
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
cmake -S . -B build -G "Ninja Multi-Config" ^
|
||||
|
|
@ -1133,7 +1136,8 @@ jobs:
|
|||
-DGGML_BACKEND_DL=ON ^
|
||||
-DGGML_CPU_ALL_VARIANTS=ON ^
|
||||
-DGGML_CUDA=ON ^
|
||||
-DGGML_RPC=ON
|
||||
-DGGML_RPC=ON ^
|
||||
-DGGML_CUDA_CUB_3DOT2=ON
|
||||
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
||||
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
||||
cmake --build build --config Release
|
||||
|
|
|
|||
|
|
@ -448,6 +448,7 @@ jobs:
|
|||
- name: Build
|
||||
id: cmake_build
|
||||
shell: cmd
|
||||
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
cmake -S . -B build -G "Ninja Multi-Config" ^
|
||||
|
|
@ -455,7 +456,8 @@ jobs:
|
|||
-DGGML_NATIVE=OFF ^
|
||||
-DGGML_CPU=OFF ^
|
||||
-DGGML_CUDA=ON ^
|
||||
-DLLAMA_CURL=OFF
|
||||
-DLLAMA_CURL=OFF ^
|
||||
-DGGML_CUDA_CUB_3DOT2=ON
|
||||
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
||||
cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,8 @@ if [ ! -z ${GG_BUILD_METAL} ]; then
|
|||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON"
|
||||
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DGGML_CUDA_CUB_3DOT2=ON"
|
||||
|
||||
if command -v nvidia-smi >/dev/null 2>&1; then
|
||||
CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.')
|
||||
|
|
|
|||
Loading…
Reference in New Issue