# gemma.cpp is a lightweight, standalone C++ inference engine for the Gemma # foundation models from Google. load("@rules_cc//cc:cc_binary.bzl", "cc_binary") load("@rules_cc//cc:cc_library.bzl", "cc_library") load("@rules_cc//cc:cc_test.bzl", "cc_test") load("@rules_license//rules:license.bzl", "license") package( default_applicable_licenses = [ "//:license", # Placeholder comment, do not modify ], # Placeholder for internal compatible_with default_visibility = ["//visibility:public"], ) license( name = "license", package_name = "gemma_cpp", ) # Dual-licensed Apache 2 and 3-clause BSD. licenses(["notice"]) exports_files([ "LICENSE", ".github/workflows/build.yml", ]) cc_library( name = "basics", srcs = ["util/basics.cc"], hdrs = ["util/basics.h"], deps = [ "@highway//:hwy", "@highway//:timer", "@highway//hwy/contrib/sort:vqsort", ], ) cc_test( name = "basics_test", srcs = ["util/basics_test.cc"], deps = [ ":basics", "@googletest//:gtest_main", # buildcleaner: keep "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:timer", ], ) cc_library( name = "args", hdrs = ["util/args.h"], deps = [ ":basics", "//io", # Path "@highway//:hwy", ], ) # Split from :threading to break a circular dependency with :allocator. cc_library( name = "topology", srcs = ["util/topology.cc"], hdrs = ["util/topology.h"], deps = [ "@highway//:hwy", "@highway//:topology", ], ) cc_library( name = "allocator", srcs = ["util/allocator.cc"], hdrs = ["util/allocator.h"], deps = [ ":basics", ":topology", "@highway//:hwy", "@highway//:thread_pool", "@highway//:topology", ], ) cc_library( name = "threading", srcs = ["util/threading.cc"], hdrs = ["util/threading.h"], deps = [ ":allocator", ":args", ":basics", ":topology", # Placeholder for container detection, do not remove "@highway//:hwy", "@highway//:thread_pool", "@highway//:topology", ], ) cc_library( name = "threading_context", srcs = ["util/threading_context.cc"], hdrs = ["util/threading_context.h"], deps = [ ":allocator", ":args", ":basics", ":threading", ":topology", ":zones", "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:profiler", "@highway//:thread_pool", ], ) cc_library( name = "zones", srcs = ["util/zones.cc"], hdrs = ["util/zones.h"], deps = [ "@highway//:hwy", "@highway//:profiler", "@highway//:thread_pool", ], ) cc_test( name = "flash_attention_test", srcs = ["gemma/flash_attention_test.cc"], deps = [ ":configs", ":gemma_args", ":gemma_lib", ":kv_cache", ":mat", ":matmul", ":ops", ":threading_context", ":weights", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "//compression:types", "@highway//:hwy", "@highway//:hwy_test_util", ], ) cc_test( name = "threading_test", srcs = ["util/threading_test.cc"], deps = [ ":basics", ":threading_context", "@googletest//:gtest_main", # buildcleaner: keep "@highway//:auto_tune", "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:nanobenchmark", "@highway//:robust_statistics", "@highway//:stats", "@highway//:thread_pool", "@highway//:timer", ], ) cc_library( name = "test_util", hdrs = ["util/test_util.h"], deps = [ "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:stats", ], ) cc_library( name = "configs", srcs = ["gemma/configs.cc"], hdrs = ["gemma/configs.h"], deps = [ ":basics", "//compression:types", "//io", "//io:fields", "@highway//:hwy", # base.h ], ) cc_test( name = "configs_test", srcs = ["gemma/configs_test.cc"], deps = [ ":configs", "@googletest//:gtest_main", # buildcleaner: keep "//compression:types", "//io:fields", ], ) cc_library( name = "tensor_info", srcs = ["gemma/tensor_info.cc"], hdrs = ["gemma/tensor_info.h"], deps = [ ":basics", ":configs", "//compression:types", ], ) cc_library( name = "mat", srcs = ["util/mat.cc"], hdrs = ["util/mat.h"], deps = [ ":allocator", ":basics", ":tensor_info", ":threading_context", "//compression:types", "//io:fields", "@highway//:hwy", "@highway//:profiler", ], ) cc_library( name = "tokenizer", srcs = ["gemma/tokenizer.cc"], hdrs = ["gemma/tokenizer.h"], deps = [ ":configs", "@highway//:hwy", "@highway//:profiler", "@com_google_sentencepiece//:sentencepiece_processor", ], ) cc_library( name = "model_store", srcs = ["gemma/model_store.cc"], hdrs = ["gemma/model_store.h"], deps = [ ":allocator", ":basics", ":configs", ":mat", ":tensor_info", ":threading_context", ":tokenizer", "//compression:types", "//io", "//io:blob_store", "//io:fields", "@highway//:hwy", "@highway//:profiler", ], ) cc_library( name = "weights", srcs = ["gemma/weights.cc"], hdrs = ["gemma/weights.h"], deps = [ ":configs", ":gemma_args", ":mat", ":model_store", ":tensor_info", ":threading_context", ":zones", "//compression:compress", "//io:blob_store", "@highway//:hwy", "@highway//:profiler", ], ) cc_test( name = "tensor_info_test", srcs = ["gemma/tensor_info_test.cc"], deps = [ ":configs", ":mat", ":tensor_info", ":weights", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "@highway//:hwy", # aligned_allocator.h ], ) # For building all tests in one command, so we can test several. test_suite( name = "ops_tests", tags = ["ops_tests"], ) cc_library( name = "matmul_env", srcs = ["ops/matmul.cc"], hdrs = ["ops/matmul.h"], deps = [ ":allocator", ":basics", ":mat", ":threading", ":threading_context", "@highway//:bit_set", "@highway//:hwy", "@highway//:nanobenchmark", "@highway//:profiler", ], ) cc_library( name = "matmul", # allow depending only on this target, without also matmul_env. hdrs = ["ops/matmul.h"], textual_hdrs = ["ops/matmul-inl.h"], deps = [ ":allocator", ":basics", ":mat", ":matmul_env", ":threading", ":threading_context", ":zones", "//compression:compress", "@highway//:bit_set", "@highway//:hwy", "@highway//:nanobenchmark", "@highway//:profiler", ], ) cc_library( name = "matmul_static", srcs = [ # single-file build time is ~30sec for msan, hence shard. "ops/matmul_static_bf16.cc", "ops/matmul_static_f32.cc", "ops/matmul_static_nuq.cc", "ops/matmul_static_sfp.cc", "ops/matmul_static_i8.cc", ], hdrs = [ "ops/matmul_static.h", ], textual_hdrs = [ "ops/matmul_static-inl.h", "ops/matmul-inl.h", ], deps = [ ":allocator", ":basics", ":mat", ":matmul", ":matmul_env", ":threading_context", ":zones", "//compression:compress", "//compression:types", "@highway//:hwy", "@highway//:profiler", "@highway//:timer", ], ) cc_library( name = "ops", hdrs = ["ops/ops.h"], textual_hdrs = [ "ops/dot-inl.h", "ops/sum-inl.h", "ops/fp_arith-inl.h", "ops/ops-inl.h", ], deps = [ ":allocator", ":basics", ":mat", ":matmul_env", # MMOptions ":matmul_static", ":threading_context", ":zones", "//compression:compress", "@highway//:algo", "@highway//:bit_set", "@highway//:hwy", "@highway//:math", "@highway//:profiler", "@highway//hwy/contrib/sort:vqsort", ], ) cc_test( name = "dot_test", size = "small", timeout = "long", srcs = ["ops/dot_test.cc"], linkstatic = True, local_defines = ["HWY_IS_TEST"], # for test_suite. tags = ["ops_tests"], deps = [ ":allocator", ":ops", ":test_util", ":threading_context", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "//compression:test_util", "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:nanobenchmark", #buildcleaner: keep "@highway//:profiler", "@highway//:stats", ], ) cc_test( name = "ops_test", size = "small", timeout = "long", srcs = ["ops/ops_test.cc"], linkstatic = True, local_defines = ["HWY_IS_TEST"], # for test_suite. tags = ["ops_tests"], deps = [ ":allocator", ":basics", ":configs", ":gemma_lib", ":mat", ":ops", ":test_util", ":threading_context", ":zones", "@googletest//:gtest_main", # buildcleaner: keep "//compression:test_util", "//compression:types", "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:nanobenchmark", #buildcleaner: keep "@highway//:profiler", ], ) cc_test( name = "matmul_test", size = "small", timeout = "long", srcs = ["ops/matmul_test.cc"], linkstatic = True, local_defines = ["HWY_IS_TEST"], # for test_suite. tags = ["ops_tests"], deps = [ ":basics", ":mat", ":matmul_env", ":matmul_static", ":ops", ":threading_context", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "//compression:test_util", "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:nanobenchmark", "@highway//:thread_pool", ], ) cc_test( name = "bench_matmul", size = "small", timeout = "long", srcs = ["ops/bench_matmul.cc"], linkstatic = True, local_defines = ["HWY_IS_TEST"], tags = [ "manual", "notap", "ops_tests", # for test_suite. ], deps = [ ":basics", ":matmul_env", ":matmul_static", ":threading_context", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "//compression:test_util", "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:nanobenchmark", "@highway//:profiler", ], ) cc_library( name = "kv_cache", srcs = ["gemma/kv_cache.cc"], hdrs = ["gemma/kv_cache.h"], deps = [ ":basics", ":configs", ":gemma_args", ":mat", "@highway//:hwy", ], ) cc_library( name = "gemma_args", hdrs = ["gemma/gemma_args.h"], deps = [ ":args", ":basics", ":mat", "//io", "@highway//:hwy", "@highway//:profiler", ], ) cc_library( name = "gemma_lib", srcs = [ "gemma/attention.cc", "gemma/flash_attention.cc", "gemma/gemma.cc", "gemma/vit.cc", ], hdrs = [ "gemma/activations.h", "gemma/attention.h", "gemma/flash_attention.h", "gemma/gemma.h", "gemma/vit.h", ], exec_properties = { # Avoid linker OOMs when building with sanitizer instrumentation. "mem": "28g", }, textual_hdrs = [ "gemma/gemma-inl.h", ], deps = [ ":basics", ":configs", ":gemma_args", ":kv_cache", ":mat", ":matmul", ":matmul_env", ":model_store", ":ops", ":threading", ":threading_context", ":weights", ":zones", "//compression:compress", "//compression:types", "//io", "//io:blob_store", "//paligemma:image", "@highway//:bit_set", "@highway//:hwy", "@highway//:nanobenchmark", # timer "@highway//:profiler", "@highway//:thread_pool", "@highway//hwy/contrib/sort:vqsort", ], ) cc_library( name = "cross_entropy", srcs = ["evals/cross_entropy.cc"], hdrs = ["evals/cross_entropy.h"], deps = [ ":gemma_lib", ":ops", "//compression:types", "@highway//:hwy", ], ) cc_library( name = "benchmark_helper", srcs = ["evals/benchmark_helper.cc"], hdrs = ["evals/benchmark_helper.h"], deps = [ ":configs", ":cross_entropy", ":gemma_args", ":gemma_lib", ":matmul_env", ":ops", ":threading_context", ":tokenizer", "@google_benchmark//:benchmark", "//compression:compress", "@highway//:hwy", "@highway//:nanobenchmark", "@highway//:profiler", ], ) cc_library( name = "gemma_shared_lib", srcs = [ "gemma/bindings/c_api.cc", "gemma/bindings/context.cc", ], hdrs = [ "gemma/bindings/c_api.h", "gemma/bindings/context.h", ], exec_properties = { # Avoid linker OOMs when building with sanitizer instrumentation. "mem": "28g", }, deps = [ ":benchmark_helper", ":gemma_args", ":gemma_lib", ":kv_cache", ":matmul_env", ":threading", ":threading_context", ":tokenizer", "//paligemma:image", "@highway//:hwy", "@highway//:profiler", "@highway//:timer", ], ) cc_test( name = "gemma_test", srcs = ["evals/gemma_test.cc"], linkstatic = True, # Requires model files tags = [ "local", "manual", "no_tap", ], deps = [ ":benchmark_helper", ":configs", ":gemma_lib", "@googletest//:gtest_main", # buildcleaner: keep "//io", "@highway//:hwy", "@highway//:hwy_test_util", ], ) cc_test( name = "gemma_batch_bench", srcs = ["evals/gemma_batch_bench.cc"], linkstatic = True, # Requires model files tags = [ "local", "manual", "no_tap", ], deps = [ ":benchmark_helper", ":gemma_lib", "@googletest//:gtest_main", # buildcleaner: keep "@highway//:hwy", "@highway//:hwy_test_util", "@highway//:nanobenchmark", "@highway//:profiler", ], ) cc_binary( name = "gemma", srcs = ["gemma/run.cc"], deps = [ ":args", ":benchmark_helper", ":gemma_args", ":gemma_lib", ":matmul_env", ":tokenizer", "//compression:types", "//paligemma:image", "@highway//:hwy", "@highway//:profiler", ], ) cc_binary( name = "single_benchmark", srcs = ["evals/benchmark.cc"], deps = [ ":args", ":benchmark_helper", ":cross_entropy", ":gemma_lib", "//io", "@highway//:hwy", "@highway//:nanobenchmark", "@nlohmann_json//:json", ], ) cc_binary( name = "benchmarks", srcs = [ "evals/benchmarks.cc", "evals/prompts.h", ], deps = [ ":benchmark_helper", "@google_benchmark//:benchmark", "@highway//:hwy", # base.h ], ) cc_binary( name = "debug_prompt", srcs = ["evals/debug_prompt.cc"], deps = [ ":args", ":benchmark_helper", ":gemma_lib", "//io", "@highway//:hwy", "@nlohmann_json//:json", ], ) cc_binary( name = "gemma_mmlu", srcs = ["evals/run_mmlu.cc"], deps = [ ":args", ":benchmark_helper", ":gemma_lib", "//io", "@highway//:hwy", "@highway//:profiler", "@nlohmann_json//:json", ], )