Build with CCCL 3.2 for CUDA backends
Gives best perf for backend-sampling on CUDA. Flag can be removed once CCCL 3.2 is bundled within CTK and that CTK version is used in llama.cpp
This commit is contained in:
parent
b5ec0fd76c
commit
1da013c66e
|
|
@ -1079,6 +1079,7 @@ jobs:
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Build with CMake
|
- name: Build with CMake
|
||||||
|
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
|
||||||
run: |
|
run: |
|
||||||
cmake -S . -B build -G Ninja \
|
cmake -S . -B build -G Ninja \
|
||||||
-DLLAMA_CURL=OFF \
|
-DLLAMA_CURL=OFF \
|
||||||
|
|
@ -1088,7 +1089,8 @@ jobs:
|
||||||
-DCMAKE_CUDA_ARCHITECTURES=89-real \
|
-DCMAKE_CUDA_ARCHITECTURES=89-real \
|
||||||
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
|
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
|
||||||
-DGGML_NATIVE=OFF \
|
-DGGML_NATIVE=OFF \
|
||||||
-DGGML_CUDA=ON
|
-DGGML_CUDA=ON \
|
||||||
|
-DGGML_CUDA_CUB_3DOT2=ON
|
||||||
cmake --build build
|
cmake --build build
|
||||||
|
|
||||||
windows-2022-cmake-cuda:
|
windows-2022-cmake-cuda:
|
||||||
|
|
@ -1123,6 +1125,7 @@ jobs:
|
||||||
- name: Build
|
- name: Build
|
||||||
id: cmake_build
|
id: cmake_build
|
||||||
shell: cmd
|
shell: cmd
|
||||||
|
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
|
||||||
run: |
|
run: |
|
||||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
|
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||||
cmake -S . -B build -G "Ninja Multi-Config" ^
|
cmake -S . -B build -G "Ninja Multi-Config" ^
|
||||||
|
|
@ -1133,7 +1136,8 @@ jobs:
|
||||||
-DGGML_BACKEND_DL=ON ^
|
-DGGML_BACKEND_DL=ON ^
|
||||||
-DGGML_CPU_ALL_VARIANTS=ON ^
|
-DGGML_CPU_ALL_VARIANTS=ON ^
|
||||||
-DGGML_CUDA=ON ^
|
-DGGML_CUDA=ON ^
|
||||||
-DGGML_RPC=ON
|
-DGGML_RPC=ON ^
|
||||||
|
-DGGML_CUDA_CUB_3DOT2=ON
|
||||||
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
||||||
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
|
|
|
||||||
|
|
@ -448,6 +448,7 @@ jobs:
|
||||||
- name: Build
|
- name: Build
|
||||||
id: cmake_build
|
id: cmake_build
|
||||||
shell: cmd
|
shell: cmd
|
||||||
|
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
|
||||||
run: |
|
run: |
|
||||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
|
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||||
cmake -S . -B build -G "Ninja Multi-Config" ^
|
cmake -S . -B build -G "Ninja Multi-Config" ^
|
||||||
|
|
@ -455,7 +456,8 @@ jobs:
|
||||||
-DGGML_NATIVE=OFF ^
|
-DGGML_NATIVE=OFF ^
|
||||||
-DGGML_CPU=OFF ^
|
-DGGML_CPU=OFF ^
|
||||||
-DGGML_CUDA=ON ^
|
-DGGML_CUDA=ON ^
|
||||||
-DLLAMA_CURL=OFF
|
-DLLAMA_CURL=OFF ^
|
||||||
|
-DGGML_CUDA_CUB_3DOT2=ON
|
||||||
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
||||||
cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda
|
cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,8 @@ if [ ! -z ${GG_BUILD_METAL} ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON"
|
# Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled withing CTK and that CTK version is used in this project
|
||||||
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DGGML_CUDA_CUB_3DOT2=ON"
|
||||||
|
|
||||||
if command -v nvidia-smi >/dev/null 2>&1; then
|
if command -v nvidia-smi >/dev/null 2>&1; then
|
||||||
CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.')
|
CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue