# gemma.cpp is a lightweight, standalone C++ inference engine for the Gemma # foundation models from Google. load("@rules_license//rules:license.bzl", "license") package( default_applicable_licenses = [ "//:license", # Placeholder comment, do not modify ], default_visibility = ["//visibility:public"], ) license( name = "license", package_name = "gemma_cpp", ) # Dual-licensed Apache 2 and 3-clause BSD. licenses(["notice"]) exports_files(["LICENSE"]) cc_library( name = "allocator", hdrs = ["util/allocator.h"], deps = [ "@hwy//:hwy", ], ) cc_library( name = "test_util", hdrs = ["util/test_util.h"], deps = [ "@hwy//:hwy", "@hwy//:hwy_test_util", "@hwy//:stats", ], ) cc_library( name = "threading", hdrs = ["util/threading.h"], deps = [ "@hwy//:hwy", "@hwy//:thread_pool", "@hwy//:topology", ], ) cc_library( name = "ops", hdrs = [ "ops/matmul.h", ], textual_hdrs = [ "ops/dot-inl.h", "ops/matmul-inl.h", "ops/matvec-inl.h", "ops/ops-inl.h", ], deps = [ ":allocator", ":threading", "//compression:compress", "//compression:sfp", "@hwy//:algo", "@hwy//:dot", "@hwy//:hwy", "@hwy//:math", "@hwy//:matvec", "@hwy//:profiler", "@hwy//:thread_pool", ], ) cc_test( name = "dot_test", size = "small", timeout = "long", srcs = ["ops/dot_test.cc"], local_defines = ["HWY_IS_TEST"], # for test_suite. tags = ["hwy_ops_test"], deps = [ ":allocator", ":common", ":gemma_lib", ":ops", ":threading", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "@hwy//:hwy", "@hwy//:hwy_test_util", "@hwy//:nanobenchmark", #buildcleaner: keep "@hwy//:profiler", "@hwy//:stats", ], ) cc_test( name = "ops_test", size = "small", timeout = "long", srcs = ["ops/ops_test.cc"], local_defines = ["HWY_IS_TEST"], # for test_suite. tags = ["hwy_ops_test"], deps = [ ":allocator", ":common", ":gemma_lib", ":ops", ":test_util", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "@hwy//:hwy", "@hwy//:hwy_test_util", "@hwy//:nanobenchmark", #buildcleaner: keep ], ) cc_test( name = "gemma_matvec_test", size = "small", timeout = "long", srcs = ["ops/gemma_matvec_test.cc"], local_defines = ["HWY_IS_TEST"], # for test_suite. tags = ["hwy_ops_test"], deps = [ ":ops", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "@hwy//:hwy", "@hwy//:hwy_test_util", "@hwy//:nanobenchmark", "@hwy//:thread_pool", ], ) cc_test( name = "matmul_test", size = "small", timeout = "long", srcs = ["ops/matmul_test.cc"], local_defines = ["HWY_IS_TEST"], # for test_suite. tags = ["hwy_ops_test"], deps = [ ":ops", ":threading", "@googletest//:gtest_main", # buildcleaner: keep "//compression:compress", "@hwy//:hwy", "@hwy//:hwy_test_util", "@hwy//:nanobenchmark", "@hwy//:thread_pool", ], ) cc_library( name = "common", srcs = ["gemma/common.cc"], hdrs = [ "gemma/common.h", "gemma/configs.h", ], deps = [ "//compression:compress", "@hwy//:hwy", # base.h "@hwy//:thread_pool", ], ) cc_library( name = "weights", srcs = ["gemma/weights.cc"], hdrs = ["gemma/weights.h"], deps = [ ":allocator", ":common", "//compression:compress", "//compression:io", "@hwy//:hwy", "@hwy//:profiler", "@hwy//:stats", "@hwy//:thread_pool", ], ) cc_library( name = "tokenizer", srcs = ["gemma/tokenizer.cc"], hdrs = ["gemma/tokenizer.h"], deps = [ ":common", "//compression:io", "@hwy//:hwy", "@hwy//:nanobenchmark", # timer "@hwy//:profiler", "@com_google_sentencepiece//:sentencepiece_processor", ], ) cc_library( name = "kv_cache", srcs = ["gemma/kv_cache.cc"], hdrs = ["gemma/kv_cache.h"], deps = [ ":common", "@hwy//:hwy", ], ) cc_library( name = "gemma_lib", srcs = [ "gemma/gemma.cc", "gemma/instantiations/27b_bf16.cc", "gemma/instantiations/27b_f32.cc", "gemma/instantiations/27b_sfp.cc", "gemma/instantiations/2b_bf16.cc", "gemma/instantiations/2b_f32.cc", "gemma/instantiations/2b_sfp.cc", "gemma/instantiations/7b_bf16.cc", "gemma/instantiations/7b_f32.cc", "gemma/instantiations/7b_sfp.cc", "gemma/instantiations/9b_bf16.cc", "gemma/instantiations/9b_f32.cc", "gemma/instantiations/9b_sfp.cc", "gemma/instantiations/tiny_bf16.cc", "gemma/instantiations/tiny_f32.cc", "gemma/instantiations/tiny_sfp.cc", "gemma/instantiations/gr2b_bf16.cc", "gemma/instantiations/gr2b_f32.cc", "gemma/instantiations/gr2b_sfp.cc", "gemma/instantiations/gemma2_2b_bf16.cc", "gemma/instantiations/gemma2_2b_f32.cc", "gemma/instantiations/gemma2_2b_sfp.cc", ], hdrs = [ "gemma/activations.h", "gemma/gemma.h", ], exec_properties = { # Avoid linker OOMs when building with sanitizer instrumentation. "mem": "28g", }, textual_hdrs = [ "gemma/gemma-inl.h", # Placeholder for internal file2, do not remove, ], deps = [ ":allocator", ":common", ":ops", ":tokenizer", ":kv_cache", ":weights", ":threading", "//compression:io", "@hwy//:hwy", "@hwy//:bit_set", "@hwy//:matvec", "@hwy//:nanobenchmark", # timer "@hwy//:profiler", "@hwy//:thread_pool", "@hwy//:topology", ], ) cc_library( name = "cross_entropy", srcs = ["evals/cross_entropy.cc"], hdrs = ["evals/cross_entropy.h"], deps = [ ":common", ":gemma_lib", "@hwy//:hwy", ], ) cc_library( name = "args", hdrs = ["util/args.h"], deps = [ "//compression:io", "@hwy//:hwy", ], ) cc_library( name = "app", hdrs = ["util/app.h"], deps = [ ":args", ":common", ":gemma_lib", "//compression:io", "@hwy//:hwy", "@hwy//:thread_pool", "@hwy//:topology", ], ) cc_library( name = "benchmark_helper", srcs = ["evals/benchmark_helper.cc"], hdrs = ["evals/benchmark_helper.h"], deps = [ ":app", ":args", ":common", ":cross_entropy", ":gemma_lib", ":kv_cache", ":threading", # Placeholder for internal dep, do not remove., "@benchmark//:benchmark", "//compression:compress", "@hwy//:hwy", "@hwy//:nanobenchmark", "@hwy//:thread_pool", ], ) cc_test( name = "gemma_test", srcs = ["evals/gemma_test.cc"], # Requires model files tags = [ "local", "manual", "no_tap", ], deps = [ ":benchmark_helper", ":common", ":gemma_lib", ":tokenizer", "@googletest//:gtest_main", "@hwy//:hwy", "@hwy//:hwy_test_util", ], ) cc_binary( name = "gemma", srcs = ["gemma/run.cc"], deps = [ ":app", ":args", ":benchmark_helper", ":common", ":gemma_lib", ":threading", # Placeholder for internal dep, do not remove., "@hwy//:hwy", "@hwy//:profiler", "@hwy//:thread_pool", ], ) cc_binary( name = "single_benchmark", srcs = ["evals/benchmark.cc"], deps = [ ":app", ":args", ":benchmark_helper", ":common", ":cross_entropy", ":gemma_lib", "//compression:io", "@hwy//:hwy", "@hwy//:nanobenchmark", "@hwy//:thread_pool", "@nlohmann_json//:json", ], ) cc_binary( name = "benchmarks", srcs = [ "evals/benchmarks.cc", "evals/prompts.h", ], deps = [ ":benchmark_helper", "@benchmark//:benchmark", ], ) cc_binary( name = "debug_prompt", srcs = [ "evals/debug_prompt.cc", ], deps = [ ":app", ":args", ":benchmark_helper", ":gemma_lib", "//compression:io", "@hwy//:hwy", "@hwy//:thread_pool", "@nlohmann_json//:json", ], ) cc_binary( name = "gemma_mmlu", srcs = ["evals/run_mmlu.cc"], deps = [ ":app", ":args", ":benchmark_helper", ":gemma_lib", "//compression:io", "@hwy//:hwy", "@hwy//:profiler", "@hwy//:thread_pool", "@nlohmann_json//:json", ], ) cc_library( name = "prompt", hdrs = ["backprop/prompt.h"], deps = [], ) cc_library( name = "sampler", hdrs = ["backprop/sampler.h"], deps = [ ":prompt", ], ) cc_library( name = "backprop", srcs = [ "backprop/backward.cc", "backprop/forward.cc", ], hdrs = [ "backprop/activations.h", "backprop/backward.h", "backprop/forward.h", ], textual_hdrs = [ "backprop/backward-inl.h", "backprop/forward-inl.h", ], deps = [ ":allocator", ":common", ":gemma_lib", ":ops", ":prompt", ":weights", "@hwy//:hwy", # base.h "@hwy//:thread_pool", ], ) cc_library( name = "backprop_scalar", hdrs = [ "backprop/activations.h", "backprop/backward_scalar.h", "backprop/common_scalar.h", "backprop/forward_scalar.h", ], deps = [ ":allocator", ":common", ":gemma_lib", ":prompt", "//compression:weights_raw", ], ) cc_test( name = "backward_scalar_test", size = "large", srcs = [ "backprop/backward_scalar_test.cc", "backprop/test_util.h", ], deps = [ ":backprop_scalar", ":common", ":gemma_lib", ":prompt", ":sampler", "@googletest//:gtest_main", "//compression:weights_raw", ], ) cc_test( name = "backward_test", size = "large", srcs = [ "backprop/backward_test.cc", "backprop/test_util.h", ], exec_properties = { # Avoid linker OOMs when building with sanitizer instrumentation. "mem": "28g", }, deps = [ ":backprop", ":backprop_scalar", ":common", ":gemma_lib", ":ops", ":prompt", ":sampler", "@googletest//:gtest_main", "//compression:weights_raw", "@hwy//:hwy", "@hwy//:hwy_test_util", "@hwy//:thread_pool", ], ) cc_library( name = "optimizer", srcs = ["backprop/optimizer.cc"], hdrs = ["backprop/optimizer.h"], deps = [ ":allocator", ":common", ":weights", "//compression:compress", "@hwy//:hwy", "@hwy//:thread_pool", ], ) cc_test( name = "optimize_test", srcs = [ "backprop/optimize_test.cc", ], exec_properties = { # Avoid linker OOMs when building with sanitizer instrumentation. "mem": "28g", }, deps = [ ":backprop", ":common", ":gemma_lib", ":optimizer", ":prompt", ":sampler", ":threading", ":weights", "@googletest//:gtest_main", "@hwy//:thread_pool", ], )