From e70e640db3868fbffea1e9499148c168c98aa5a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 29 Dec 2025 09:35:42 +0100 Subject: [PATCH] CUDA: Blackwell features for non-native builds (#18436) --- ggml/src/ggml-cuda/CMakeLists.txt | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/CMakeLists.txt b/ggml/src/ggml-cuda/CMakeLists.txt index 3b438c30ce..73f2910b7c 100644 --- a/ggml/src/ggml-cuda/CMakeLists.txt +++ b/ggml/src/ggml-cuda/CMakeLists.txt @@ -35,6 +35,20 @@ if (CUDAToolkit_FOUND) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8") list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real) endif() + + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8") + # The CUDA architecture 120f-virtual would in principle work for Blackwell support + # but the newly added "f" suffix conflicted with a preexising regex for validating CUDA architectures in CMake. + # So either a recent CMake version or one with the backported fix is needed. + # The following versions should work: + # - CMake >= v3.31.8 && CMake < v4.0.0 + # - CMake >= v4.0.2 + # This is NOT documented in the CMake release notes, + # check Modules/Internal/CMakeCUDAArchitecturesValidate.cmake in the CMake git repository instead. + # However, the architectures 120a-real and 121a-real should work with basically any CMake version and + # until the release of e.g. Rubin there is no benefit to shipping virtual architectures for Blackwell. + list(APPEND CMAKE_CUDA_ARCHITECTURES 120a-real 121a-real) + endif() endif() endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") @@ -62,7 +76,7 @@ if (CUDAToolkit_FOUND) else() foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES}) if(ARCH MATCHES "^12[0-9](-real|-virtual)?$") - message(FATAL_ERROR "Compute capability ${ARCH} used, use ${ARCH}a or ${ARCH}f for Blackwell specific optimizations") + message(FATAL_ERROR "Compute capability ${ARCH} used, use ${ARCH}a or ${ARCH}f for Blackwell-specific optimizations") endif() endforeach() endif()