From 94de74e7b1b9ee6404b54ebb0df273a3ef35a555 Mon Sep 17 00:00:00 2001 From: Boian Berberov <7432115+bberberov@users.noreply.github.com> Date: Sun, 28 Dec 2025 07:33:29 +0000 Subject: [PATCH] cmake: Added more x86_64 CPU backends when building with `GGML_CPU_ALL_VARIANTS=On` (#18186) * minor: Consolidated `#include ` under `ggml-cpu-impl.h` * cmake: Added more x86-64 CPU backends when building with `GGML_CPU_ALL_VARIANTS=On` - `ivybridge` - `piledriver` - `cannonlake` - `cascadelake` - `cooperlake` - `zen4` Resolves: #17966 --- ggml/CMakeLists.txt | 12 ++++++++++++ ggml/src/CMakeLists.txt | 28 +++++++++++++++++++++------- ggml/src/ggml-cpu/ggml-cpu-impl.h | 2 +- ggml/src/ggml-cpu/simd-mappings.h | 4 ---- ggml/src/ggml-impl.h | 4 ---- 5 files changed, 34 insertions(+), 16 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 18d117f7cc..cb46c32100 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -430,10 +430,22 @@ if (MSVC) configure_msvc_target(ggml-cpu-x64) configure_msvc_target(ggml-cpu-sse42) configure_msvc_target(ggml-cpu-sandybridge) + # __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512 + # skipping ggml-cpu-ivybridge + # skipping ggml-cpu-piledriver configure_msvc_target(ggml-cpu-haswell) configure_msvc_target(ggml-cpu-skylakex) + configure_msvc_target(ggml-cpu-cannonlake) + configure_msvc_target(ggml-cpu-cascadelake) configure_msvc_target(ggml-cpu-icelake) + # MSVC 2022 doesn't support BF16 intrinsics without `/arch:AVX10.1` ?! + # https://learn.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=msvc-170 + # https://learn.microsoft.com/en-us/cpp/build/reference/arch-x64?view=msvc-170 + # skipping ggml-cpu-cooperlake + # skipping ggml-cpu-zen4 configure_msvc_target(ggml-cpu-alderlake) + # MSVC doesn't support AMX + # skipping ggml-cpu-sapphirerapids if (GGML_BUILD_EXAMPLES) configure_msvc_target(common-ggml) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 262d78a4cf..25f25c4236 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -357,15 +357,29 @@ if (GGML_CPU_ALL_VARIANTS) endif() if (GGML_SYSTEM_ARCH STREQUAL "x86") ggml_add_cpu_backend_variant(x64) - ggml_add_cpu_backend_variant(sse42 SSE42) - ggml_add_cpu_backend_variant(sandybridge SSE42 AVX) - ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA) - ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512) - ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI) - ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI) + ggml_add_cpu_backend_variant(sse42 SSE42) + ggml_add_cpu_backend_variant(sandybridge SSE42 AVX) + if (NOT MSVC) + # __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512 + ggml_add_cpu_backend_variant(ivybridge SSE42 AVX F16C) + ggml_add_cpu_backend_variant(piledriver SSE42 AVX F16C FMA) + endif() + ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C FMA AVX2 BMI2) + ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C FMA AVX2 BMI2 AVX512) + ggml_add_cpu_backend_variant(cannonlake SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VBMI) + ggml_add_cpu_backend_variant(cascadelake SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VNNI) + ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VBMI AVX512_VNNI) + if (NOT MSVC) + # MSVC 2022 doesn't support BF16 intrinsics without `/arch:AVX10.1` ?! + # https://learn.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=msvc-170 + # https://learn.microsoft.com/en-us/cpp/build/reference/arch-x64?view=msvc-170 + ggml_add_cpu_backend_variant(cooperlake SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VNNI AVX512_BF16) + ggml_add_cpu_backend_variant(zen4 SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16) + endif() + ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C FMA AVX2 BMI2 AVX_VNNI) if (NOT MSVC) # MSVC doesn't support AMX - ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) + ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C FMA AVX2 BMI2 AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) endif() elseif(GGML_SYSTEM_ARCH STREQUAL "ARM") if (CMAKE_SYSTEM_NAME MATCHES "Linux") diff --git a/ggml/src/ggml-cpu/ggml-cpu-impl.h b/ggml/src/ggml-cpu/ggml-cpu-impl.h index 7597377cc2..0e8dd0ae05 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-impl.h +++ b/ggml/src/ggml-cpu/ggml-cpu-impl.h @@ -328,7 +328,7 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b) #if defined(_MSC_VER) || defined(__MINGW32__) #include -#elif defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__) +#elif defined(__SSE__) || defined(__SSE3__) || defined(__SSSE3__) || defined(__AVX__) || defined(__F16C__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX512BF16__) #include #endif diff --git a/ggml/src/ggml-cpu/simd-mappings.h b/ggml/src/ggml-cpu/simd-mappings.h index 101a9c086b..a7a8272205 100644 --- a/ggml/src/ggml-cpu/simd-mappings.h +++ b/ggml/src/ggml-cpu/simd-mappings.h @@ -14,10 +14,6 @@ #include #endif -#if defined(__F16C__) -#include -#endif - #if defined(__riscv_v_intrinsic) #include #endif diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index fe57d4c582..80e0fd2ff8 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -24,10 +24,6 @@ #include #endif -#if defined(__F16C__) -#include -#endif - #ifdef __cplusplus extern "C" { #endif