CUDA: fix replacment of bad archs in CMake (#18457)
This commit is contained in:
parent
5b1248c9af
commit
0bd1212a43
|
|
@ -150,19 +150,38 @@ We also have a [guide](./backend/CUDA-FEDORA.md) for setting up CUDA toolkit in
|
|||
|
||||
|
||||
### Compilation
|
||||
|
||||
Make sure to read the notes about the CPU build for general instructions for e.g. speeding up the compilation.
|
||||
|
||||
```bash
|
||||
cmake -B build -DGGML_CUDA=ON
|
||||
cmake --build build --config Release
|
||||
```
|
||||
|
||||
### Non-Native Builds
|
||||
|
||||
By default llama.cpp will be built for the hardware that is connected to the system at that time.
|
||||
For a build covering all CUDA GPUs, disable `GGML_NATIVE`:
|
||||
|
||||
```bash
|
||||
cmake -B build -DGGML_CUDA=ON -DGGML_NATIVE=OFF
|
||||
```
|
||||
|
||||
The resulting binary should run on all CUDA GPUs with optimal performance, though some just-in-time compilation may be required.
|
||||
|
||||
### Override Compute Capability Specifications
|
||||
|
||||
If `nvcc` cannot detect your gpu, you may get compile-warnings such as:
|
||||
If `nvcc` cannot detect your gpu, you may get compile warnings such as:
|
||||
```text
|
||||
nvcc warning : Cannot find valid GPU for '-arch=native', default arch is used
|
||||
```
|
||||
|
||||
To override the `native` GPU detection:
|
||||
One option is to do a non-native build as described above.
|
||||
However, this will result in a large binary that takes a long time to compile.
|
||||
Alternatively it is also possible to explicitly specify CUDA architectures.
|
||||
This may also make sense for a non-native build, for that one should look at the logic in `ggml/src/ggml-cuda/CMakeLists.txt` as a starting point.
|
||||
|
||||
To override the default CUDA architectures:
|
||||
|
||||
#### 1. Take note of the `Compute Capability` of your NVIDIA devices: ["CUDA: Your GPU Compute > Capability"](https://developer.nvidia.com/cuda-gpus).
|
||||
|
||||
|
|
|
|||
|
|
@ -51,35 +51,35 @@ if (CUDAToolkit_FOUND)
|
|||
endif()
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||
|
||||
enable_language(CUDA)
|
||||
|
||||
# Replace any 12x-real architectures with 12x{a}-real. FP4 ptx instructions are not available in just 12x
|
||||
if (GGML_NATIVE)
|
||||
set(PROCESSED_ARCHITECTURES "")
|
||||
if (CMAKE_CUDA_ARCHITECTURES_NATIVE)
|
||||
set(ARCH_LIST ${CMAKE_CUDA_ARCHITECTURES_NATIVE})
|
||||
else()
|
||||
set(ARCH_LIST ${CMAKE_CUDA_ARCHITECTURES})
|
||||
endif()
|
||||
foreach(ARCH ${ARCH_LIST})
|
||||
# Replace any plain 12X CUDA architectures with their "architecture-specific" equivalents 12Xa.
|
||||
# 12X is forwards-compatible, 12Xa is not.
|
||||
# Notably the Blackwell FP4 tensor core instructions are not forwards compatible and therefore need 12Xa.
|
||||
# But while 12X vs. 12Xa can be checked in device code there is (to my knowledge) no easy way to do the same check in host code.
|
||||
# So for now just replace all instances of 12X with 12Xa, this should be fine until Rubin is released.
|
||||
foreach(ARCHS IN ITEMS CMAKE_CUDA_ARCHITECTURES CMAKE_CUDA_ARCHITECTURES_NATIVE)
|
||||
set(FIXED_ARCHS "")
|
||||
foreach(ARCH IN LISTS ${ARCHS})
|
||||
if (ARCH MATCHES "^12[0-9](-real|-virtual)?$")
|
||||
string(REGEX REPLACE "^(12[0-9]).*$" "\\1" BASE_ARCH ${ARCH})
|
||||
message(STATUS "Replacing ${ARCH} with ${BASE_ARCH}a-real")
|
||||
list(APPEND PROCESSED_ARCHITECTURES "${BASE_ARCH}a-real")
|
||||
string(REGEX REPLACE "^(12[0-9])((-real|-virtual)?)$" "\\1a\\2" FIXED_ARCH ${ARCH})
|
||||
message(STATUS "Replacing ${ARCH} in ${ARCHS} with ${FIXED_ARCH}")
|
||||
list(APPEND FIXED_ARCHS "${FIXED_ARCH}")
|
||||
else()
|
||||
list(APPEND PROCESSED_ARCHITECTURES ${ARCH})
|
||||
endif()
|
||||
endforeach()
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${PROCESSED_ARCHITECTURES})
|
||||
else()
|
||||
foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES})
|
||||
if(ARCH MATCHES "^12[0-9](-real|-virtual)?$")
|
||||
message(FATAL_ERROR "Compute capability ${ARCH} used, use ${ARCH}a or ${ARCH}f for Blackwell-specific optimizations")
|
||||
list(APPEND FIXED_ARCHS "${ARCH}")
|
||||
endif()
|
||||
endforeach()
|
||||
set(${ARCHS} ${FIXED_ARCHS})
|
||||
endforeach()
|
||||
|
||||
# If we try to compile a "native" build it will use the 12X architectures and fail.
|
||||
# So we should instead use the native architectures as determined by CMake after replacing 12X with 12Xa.
|
||||
# But if at the time of the build no GPUs are connected at all CMAKE_CUDA_ARCHITECTURES will contain garbage that we should not use.
|
||||
if (CMAKE_CUDA_ARCHITECTURES STREQUAL "native" AND CMAKE_CUDA_ARCHITECTURES_NATIVE MATCHES "^[0-9]+(a|f)?(-real|-virtual)?(;[0-9]+(a|f)?(-real|-virtual)?|;)*$")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES_NATIVE})
|
||||
endif()
|
||||
message(STATUS "Using CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} CMAKE_CUDA_ARCHITECTURES_NATIVE=${CMAKE_CUDA_ARCHITECTURES_NATIVE}")
|
||||
|
||||
file(GLOB GGML_HEADERS_CUDA "*.cuh")
|
||||
list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")
|
||||
|
|
|
|||
Loading…
Reference in New Issue