From 2f7d0ac015705f0f5060a62ee832393ee255847b Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 15 Sep 2025 12:07:15 +0200 Subject: [PATCH] ggml : add CPU backend reference implementation This commit introduces a CPU reference implementation for GGML, designed primarily for testing and validation purposes. The motivation for this addition is to have a pure C CPU backend implementation that does not use any hardware-specific optimizations or intrinsics. This will allow for testing the CPU backend variants against the reference implementation to ensure correctness Building: ```console $ cmake -B build \ -DGGML_CPU_REF_BACKEND=ON -DGGML_BACKEND_DL=ON \ -DGGML_CPU_ALL_VARIANTS=ON ``` List availble cpu architectures/variants: ```console $ ./build/bin/test-backend-ops cpu-variants --list CPU variants: CPU-haswell - 12th Gen Intel(R) Core(TM) i7-1260P CPU-sse42 - 12th Gen Intel(R) Core(TM) i7-1260P CPU-x64 - 12th Gen Intel(R) Core(TM) i7-1260P CPU-alderlake - 12th Gen Intel(R) Core(TM) i7-1260P CPU-sandybridge - 12th Gen Intel(R) Core(TM) i7-1260P ``` Run tests: ```console ./build-ref/bin/test-backend-ops cpu-variants --variant CPU-alderlake -o ADD CPU-ref features: SSE2 = 1 CPU-alderlake features: SSE2 = 1 SSE3 = 1 SSSE3 = 1 AVX = 1 AVX_VNNI = 1 AVX2 = 1 F16C = 1 FMA = 1 BMI2 = 1 LLAMAFILE = 1 OPENMP = 1 REPACK = 1 Testing CPU variant 'CPU-alderlake' against 'CPU-ref' backend... ADD(type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1): OK ADD(type=f16,ne=[1,1,1,1],nr=[32,1,1,1],nf=1): OK ... ``` --- ggml/CMakeLists.txt | 9 ++- ggml/include/ggml-backend.h | 3 + ggml/include/ggml-cpu.h | 1 + ggml/src/CMakeLists.txt | 27 +++++++ ggml/src/ggml-backend-reg.cpp | 75 +++++++++++++++-- ggml/src/ggml-cpu/CMakeLists.txt | 6 ++ ggml/src/ggml-cpu/ggml-cpu.c | 8 ++ ggml/src/ggml-cpu/ggml-cpu.cpp | 7 +- tests/CMakeLists.txt | 3 + tests/test-backend-ops.cpp | 135 ++++++++++++++++++++++++++++++- 10 files changed, 261 insertions(+), 13 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 0176ca1ce9..0be54ba31e 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -263,8 +263,9 @@ option(GGML_ZENDNN "ggml: use ZenDNN" option(ZENDNN_ROOT "ggml: path to ZenDNN installation" "") # extra artifacts -option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) -option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) +option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) +option(GGML_CPU_REF_BACKEND "ggml: build reference CPU backend for testing" OFF) +option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) # # dependencies @@ -294,7 +295,9 @@ add_subdirectory(src) if (GGML_BUILD_TESTS) enable_testing() - add_subdirectory(tests) + if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tests") + add_subdirectory(tests) + endif () endif () if (GGML_BUILD_EXAMPLES) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index a9d1778641..2fb61c3347 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -245,6 +245,9 @@ extern "C" { // Load all known backends from dynamic libraries GGML_API void ggml_backend_load_all(void); GGML_API void ggml_backend_load_all_from_path(const char * dir_path); + // Load all variants for a backend and register them + GGML_API void ggml_backend_load_all_variants(const char * backend_name); + GGML_API void ggml_backend_load_variant(const char * backend_name, const char * variant); // // Backend scheduler diff --git a/ggml/include/ggml-cpu.h b/ggml/include/ggml-cpu.h index 4f3b99c8d0..df52218e2b 100644 --- a/ggml/include/ggml-cpu.h +++ b/ggml/include/ggml-cpu.h @@ -75,6 +75,7 @@ extern "C" { // // x86 + GGML_BACKEND_API int ggml_cpu_has_sse2 (void); GGML_BACKEND_API int ggml_cpu_has_sse3 (void); GGML_BACKEND_API int ggml_cpu_has_ssse3 (void); GGML_BACKEND_API int ggml_cpu_has_avx (void); diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 6192a87046..b7348c8451 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -459,6 +459,33 @@ ggml_add_backend(OpenCL) ggml_add_backend(Hexagon) ggml_add_backend(ZenDNN) +if (GGML_CPU_REF_BACKEND) + if (NOT GGML_BACKEND_DL) + message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL") + endif() + set(GGML_SYSTEM_ARCH "cpu-ref") + set(GGML_LLAMAFILE OFF) + set(GGML_CPU_HBM OFF) + set(GGML_CPU_REPACK OFF) + set(GGML_OPENMP OFF) + set(GGML_CPU_KLEIDIAI OFF) + set(GGML_ACCELERATE OFF) + + ggml_add_cpu_backend_variant(ref) + + if (GGML_SYSTEM_ARCH MATCHES "arm|aarch64|ARM|AARCH64") + target_compile_options(ggml-cpu-ref PRIVATE + -U__ARM_NEON + -U__ARM_FEATURE_FMA + -U__ARM_FEATURE_FP16_VECTOR_ARITHMETIC + -U__ARM_FEATURE_DOTPROD + -U__ARM_FEATURE_MATMUL_INT8 + -U__ARM_FEATURE_SVE + ) + endif() + target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF) +endif() + foreach (target ggml-base ggml) target_include_directories(${target} PUBLIC $ $) target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 4181a714ad..5b7967ea83 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -519,12 +519,11 @@ static fs::path backend_filename_extension() { #endif } -static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { - // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths - const fs::path name_path = fs::u8path(name); - const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native(); - const fs::path file_extension = backend_filename_extension(); +static fs::path backend_filename_prefix_with_name(const char * backend_name) { + return backend_filename_prefix().native() + fs::u8path(backend_name).native() + fs::u8path("-").native(); +} +static std::vector get_backend_search_paths(const char * user_search_path = nullptr) { std::vector search_paths; if (user_search_path == nullptr) { #ifdef GGML_BACKEND_DIR @@ -536,6 +535,16 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, } else { search_paths.push_back(fs::u8path(user_search_path)); } + return search_paths; +} + +static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { + // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths + const fs::path name_path = fs::u8path(name); + const fs::path file_prefix = backend_filename_prefix_with_name(name); + const fs::path file_extension = backend_filename_extension(); + + std::vector search_paths = get_backend_search_paths(user_search_path); int best_score = 0; fs::path best_path; @@ -629,4 +638,60 @@ void ggml_backend_load_all_from_path(const char * dir_path) { if (backend_path) { ggml_backend_load(backend_path); } +#ifdef GGML_USE_CPU_REF + ggml_backend_load_best("cpu-ref", silent, dir_path); +#endif +} + +void ggml_backend_load_all_variants(const char * backend_name) { + const fs::path file_prefix = backend_filename_prefix_with_name(backend_name); + const fs::path file_extension = backend_filename_extension(); + + std::vector search_paths = get_backend_search_paths(); + + // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths + for (const auto & search_path : search_paths) { + if (!fs::exists(search_path)) { + GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str()); + continue; + } + + for (const auto & entry : fs::directory_iterator(search_path, fs::directory_options::skip_permission_denied)) { + if (entry.is_regular_file()) { + auto filename = entry.path().filename(); + auto ext = entry.path().extension(); + if (filename.native().find(file_prefix.native()) == 0 && ext == file_extension) { + fs::path path = search_path / filename; + ggml_backend_reg_t backend = get_reg().load_backend(path, false); + if (backend == nullptr) { + GGML_LOG_ERROR("%s: failed to load backend variant %s\n", __func__, path_str(entry.path()).c_str()); + } + } + } + } + } +} + +void ggml_backend_load_variant(const char * backend_name, const char * variant) { + const fs::path file_prefix = backend_filename_prefix_with_name(backend_name); + const fs::path target_filename = file_prefix.native() + fs::u8path(variant).native() + backend_filename_extension().native(); + + std::vector search_paths = get_backend_search_paths(); + + for (const auto & search_path : search_paths) { + if (!fs::exists(search_path)) { + GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str()); + continue; + } + + fs::path full_path = search_path / target_filename; + if (fs::exists(full_path) && fs::is_regular_file(full_path)) { + ggml_backend_reg_t backend = get_reg().load_backend(full_path, false); + if (backend == nullptr) { + GGML_LOG_ERROR("%s: failed to load backend variant %s\n", __func__, path_str(full_path).c_str()); + } else { + return; + } + } + } } diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 7622d0bf49..9955a0a3e2 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) + if (tag_name) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}") + else() + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU") + endif() + if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index f7ba1fe317..a85aa957ff 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -3558,6 +3558,14 @@ int ggml_cpu_has_llamafile(void) { #endif } +int ggml_cpu_has_sse2(void) { +#if defined(__SSE2__) + return 1; +#else + return 0; +#endif +} + int ggml_cpu_has_sse3(void) { #if defined(__SSE3__) return 1; diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index f4713a4218..00fc1e2743 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -108,7 +108,7 @@ struct ggml_backend_cpu_context { }; static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) { - return "CPU"; + return GGML_CPU_VARIANT_NAME; GGML_UNUSED(backend); } @@ -337,7 +337,7 @@ struct ggml_backend_cpu_device_context { }; static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) { - return "CPU"; + return GGML_CPU_VARIANT_NAME; GGML_UNUSED(dev); } @@ -516,6 +516,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r ggml_cpu_init(); std::vector features; + if (ggml_cpu_has_sse2()) { + features.push_back({ "SSE2", "1" }); + } if (ggml_cpu_has_sse3()) { features.push_back({ "SSE3", "1" }); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c3d9f9c324..6f5a3f42f6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -218,6 +218,9 @@ if (NOT LLAMA_SANITIZE_ADDRESS AND NOT GGML_SCHED_NO_REALLOC) endif() llama_build_and_test(test-gguf.cpp) llama_build_and_test(test-backend-ops.cpp) +target_sources(test-backend-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src/ggml.c) +target_compile_definitions(test-backend-ops PRIVATE GGML_BUILD GGML_VERSION=\"${GGML_VERSION}\" GGML_COMMIT=\"${GGML_COMMIT}\") +target_include_directories(test-backend-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src) llama_build_and_test(test-model-load-cancel.cpp LABEL "model") llama_build_and_test(test-autorelease.cpp LABEL "model") diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 6dedd8de58..76f0bf64c4 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -20,6 +20,8 @@ #include #include +#include "ggml-impl.h" + #include #include #include @@ -459,6 +461,7 @@ enum test_mode { MODE_PERF, MODE_GRAD, MODE_SUPPORT, + MODE_CPU_VARIANTS, }; // Output format support similar to llama-bench @@ -8555,18 +8558,120 @@ static void show_test_coverage() { printf(" Coverage: %.1f%%\n", (double)covered_ops.size() / all_ops.size() * 100.0); } +static void print_backend_features(ggml_backend_t backend) { + auto device = ggml_backend_get_device(backend); + auto reg = ggml_backend_dev_backend_reg(device); + auto name = ggml_backend_dev_name(device); + auto * get_features_fn = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features"); + if (get_features_fn) { + ggml_backend_feature * features = get_features_fn(reg); + printf("%s features:\n", name); + if (features->name == nullptr) { + printf(" (no features reported)\n"); + } else { + for (; features->name; features++) { + printf(" %s = %s\n", features->name, features->value); + } + } + } +} + +static bool test_cpu_variant(const char * variant_name, const char * op_names_filter, + const char * params_filter, printer * output_printer) { + ggml_backend_load_variant("cpu", std::string(variant_name).substr(4).c_str()); + + std::string backend_ref_name = "CPU-ref"; + ggml_backend_load_variant("cpu", std::string(backend_ref_name).substr(4).c_str()); + + ggml_backend_t backend_ref = ggml_backend_init_by_name(backend_ref_name.c_str(), nullptr); + if (backend_ref == nullptr) { + printf("Error: CPU-ref backend not found. Make sure it's built and available.\n"); + return false; + } + print_backend_features(backend_ref); + + ggml_backend_t backend_variant = ggml_backend_init_by_name(variant_name, nullptr); + if (backend_variant == nullptr) { + printf("Error: CPU variant '%s' not found or failed to initialize.\n", variant_name); + printf("Use --list to see available variants.\n"); + ggml_backend_free(backend_ref); + return false; + } + print_backend_features(backend_variant); + + printf("Testing CPU variant '%s' against '%s' backend...\n\n", variant_name, backend_ref_name.c_str()); + + auto test_cases = make_test_cases_eval(); + + if (params_filter != nullptr) { + std::regex regex(params_filter); + test_cases.erase( + std::remove_if(test_cases.begin(), test_cases.end(), + [®ex](const auto & test_case) { + return !std::regex_search(test_case->vars(), regex); + }), + test_cases.end() + ); + } + + size_t n_ok = 0; + for (auto & test : test_cases) { + if (test->eval(backend_ref, backend_variant, op_names_filter, output_printer) == test_status_t::FAIL) { + n_ok++; + } + } + + output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false)); + + ggml_backend_free(backend_variant); + ggml_backend_free(backend_ref); + + return n_ok == test_cases.size(); +} + +static void list_cpu_variants() { + std::unordered_map variant_names; + ggml_backend_load_all_variants("cpu"); + + for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { + ggml_backend_dev_t dev = ggml_backend_dev_get(i); + if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) { + const char * name = ggml_backend_dev_name(dev); + if (strcmp(name, "CPU-ref") != 0) { + variant_names.emplace(name, ggml_backend_dev_description(dev)); + } + } + } + + if (variant_names.empty()) { + printf("No CPU backend variants found. To enable CPU variants, rebuild with:\n"); + printf(" cmake -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON\n"); + return; + } + + printf("CPU variants:\n"); + for (const auto & it : variant_names) { + printf(" %-15s - %s\n", it.first.c_str(), it.second.c_str()); + } +} + static void usage(char ** argv) { - printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--show-coverage]\n", argv[0]); + printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--list-cpu-variants] [--show-coverage]\n", argv[0]); printf(" valid modes:\n"); printf(" - test (default, compare with CPU backend for correctness)\n"); printf(" - grad (compare gradients from backpropagation with method of finite differences)\n"); printf(" - perf (performance evaluation)\n"); printf(" - support (probe backend operation support)\n"); + printf(" - cpu-variants (test CPU variants against cpu-ref backend)\n"); printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc),\n"); printf(" optionally including the full test case string (e.g. \"ADD(type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1)\")\n"); printf(" --output specifies output format (default: console, options: console, sql, csv)\n"); printf(" --list-ops lists all available GGML operations\n"); + printf(" --list-cpu-variants lists all available CPU backend variants\n"); printf(" --show-coverage shows test coverage\n"); + printf(" cpu-variants mode options:\n"); + printf(" --list lists available CPU variants on this system\n"); + printf(" --variant test specific CPU variant against cpu-ref backend\n"); } int main(int argc, char ** argv) { @@ -8575,6 +8680,7 @@ int main(int argc, char ** argv) { const char * op_names_filter = nullptr; const char * backend_filter = nullptr; const char * params_filter = nullptr; + const char * cpu_variant_name = nullptr; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -8585,6 +8691,8 @@ int main(int argc, char ** argv) { mode = MODE_GRAD; } else if (strcmp(argv[i], "support") == 0) { mode = MODE_SUPPORT; + } else if (strcmp(argv[i], "cpu-variants") == 0) { + mode = MODE_CPU_VARIANTS; } else if (strcmp(argv[i], "-o") == 0) { if (i + 1 < argc) { op_names_filter = argv[++i]; @@ -8619,6 +8727,16 @@ int main(int argc, char ** argv) { } else if (strcmp(argv[i], "--list-ops") == 0) { list_all_ops(); return 0; + } else if (strcmp(argv[i], "--list") == 0) { + list_cpu_variants(); + return 0; + } else if (strcmp(argv[i], "--variant") == 0) { + if (i + 1 < argc) { + cpu_variant_name = argv[++i]; + } else { + usage(argv); + return 1; + } } else if (strcmp(argv[i], "--show-coverage") == 0) { show_test_coverage(); return 0; @@ -8628,8 +8746,6 @@ int main(int argc, char ** argv) { } } - // load and enumerate backends - ggml_backend_load_all(); // Create printer for output format std::unique_ptr output_printer = create_printer(output_format); @@ -8637,6 +8753,19 @@ int main(int argc, char ** argv) { output_printer->print_header(); } + if (mode == MODE_CPU_VARIANTS) { + if (cpu_variant_name == nullptr) { + printf("Error: cpu-variants mode requires --variant or --list\n"); + usage(argv); + return 1; + } + + return test_cpu_variant(cpu_variant_name, op_names_filter, params_filter, output_printer.get()) ? 0 : 1; + } + + // load and enumerate backends + ggml_backend_load_all(); + output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count())); size_t n_ok = 0;