Build with CCCL 3.2 for CUDA backends

Gives best perf for backend-sampling on CUDA. Flag can be removed once
CCCL 3.2 is bundled within CTK and that CTK version is used in llama.cpp
This commit is contained in:
Oliver Simons 2025-12-19 16:10:51 +01:00
parent b5ec0fd76c
commit 1da013c66e
3 changed files with 11 additions and 4 deletions

View File

@ -1079,6 +1079,7 @@ jobs:
evict-old-files: 1d
- name: Build with CMake
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
run: |
cmake -S . -B build -G Ninja \
-DLLAMA_CURL=OFF \
@ -1088,7 +1089,8 @@ jobs:
-DCMAKE_CUDA_ARCHITECTURES=89-real \
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
-DGGML_NATIVE=OFF \
-DGGML_CUDA=ON
-DGGML_CUDA=ON \
-DGGML_CUDA_CUB_3DOT2=ON
cmake --build build
windows-2022-cmake-cuda:
@ -1123,6 +1125,7 @@ jobs:
- name: Build
id: cmake_build
shell: cmd
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
cmake -S . -B build -G "Ninja Multi-Config" ^
@ -1133,7 +1136,8 @@ jobs:
-DGGML_BACKEND_DL=ON ^
-DGGML_CPU_ALL_VARIANTS=ON ^
-DGGML_CUDA=ON ^
-DGGML_RPC=ON
-DGGML_RPC=ON ^
-DGGML_CUDA_CUB_3DOT2=ON
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
cmake --build build --config Release

View File

@ -448,6 +448,7 @@ jobs:
- name: Build
id: cmake_build
shell: cmd
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
cmake -S . -B build -G "Ninja Multi-Config" ^
@ -455,7 +456,8 @@ jobs:
-DGGML_NATIVE=OFF ^
-DGGML_CPU=OFF ^
-DGGML_CUDA=ON ^
-DLLAMA_CURL=OFF
-DLLAMA_CURL=OFF ^
-DGGML_CUDA_CUB_3DOT2=ON
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda

View File

@ -52,7 +52,8 @@ if [ ! -z ${GG_BUILD_METAL} ]; then
fi
if [ ! -z ${GG_BUILD_CUDA} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON"
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DGGML_CUDA_CUB_3DOT2=ON"
if command -v nvidia-smi >/dev/null 2>&1; then
CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.')