diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6fa432c..a0e9dc2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -72,4 +72,4 @@ jobs: with: path: ~/.cache/bazel key: bazel-${{ runner.os }} - - run: bazel build --cxxopt=-std=c++20 //... + - run: bazel build -c opt --cxxopt=-std=c++20 //... \ No newline at end of file diff --git a/BUILD.bazel b/BUILD.bazel index 319421f..84b393c 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -4,9 +4,7 @@ load("@rules_license//rules:license.bzl", "license") package( - default_applicable_licenses = [ - "//:license", # Placeholder comment, do not modify - ], + default_applicable_licenses = ["//third_party/gemma_cpp:license"], default_visibility = ["//visibility:public"], ) diff --git a/bazel/BUILD b/bazel/BUILD index 194a082..952624f 100644 --- a/bazel/BUILD +++ b/bazel/BUILD @@ -1,4 +1,3 @@ -# Required for referencing bazel:com_google_sentencepiece.patch package( default_applicable_licenses = ["//:license"], default_visibility = ["//:__subpackages__"], diff --git a/compression/BUILD b/compression/BUILD index cfbeb99..ddf30c5 100644 --- a/compression/BUILD +++ b/compression/BUILD @@ -1,12 +1,10 @@ # Weight compression, I/O and analysis package( - default_applicable_licenses = [ - "//:license", # Placeholder comment, do not modify - ], + default_applicable_licenses = ["//third_party/gemma_cpp:license"], default_visibility = [ # Placeholder for internal visibility, - "//:__subpackages__", # Placeholder, do not modify + "//third_party/gemma_cpp:__subpackages__", ], ) diff --git a/examples/hello_world/run.cc b/examples/hello_world/run.cc index a352250..a994f31 100644 --- a/examples/hello_world/run.cc +++ b/examples/hello_world/run.cc @@ -17,10 +17,13 @@ // copybara:import_next_line:gemma_cpp #include "gemma.h" -// copybara:import_next_line:gemma_cpp -#include "util/app.h" // LoaderArgs +// copybara:end // copybara:import_next_line:gemma_cpp #include "util/args.h" +// copybara:end +// copybara:import_next_line:gemma_cpp +#include "util/app.h" // LoaderArgs +// copybara:end #include "hwy/contrib/thread_pool/thread_pool.h" std::vector tokenize( diff --git a/gemma.cc b/gemma.cc index a230bea..b41bd9c 100644 --- a/gemma.cc +++ b/gemma.cc @@ -25,8 +25,6 @@ #include "compression/compress-inl.h" // copybara:import_next_line:gemma_cpp #include "ops.h" -// copybara:import_next_line:gemma_cpp -#include "util/args.h" // Path #include "hwy/contrib/matvec/matvec-inl.h" #include "hwy/highway.h" #include "hwy/profiler.h" @@ -52,7 +50,10 @@ #include #include +// copybara:strip_begin +// Required because sentencepiece uses Google I/O which requires InitGoogle. // Placeholder for internal header, do not modify. +// copybara:strip_end // copybara:import_next_line:gemma_cpp #include "compression/compress.h" @@ -817,9 +818,8 @@ void GemmaImpl::Generate( } Gemma::Gemma(const Path& tokenizer_path, const Path& compressed_weights_path, - const Path& weights_path, Model model_type, ModelTraining training, - hwy::ThreadPool& pool) - : model_training(training) { + const Path& weights_path, Model model_type, + hwy::ThreadPool& pool) { std::unique_ptr tokenizer; { PROFILER_ZONE("Startup.tokenizer"); @@ -844,6 +844,11 @@ Gemma::Gemma(const Path& tokenizer_path, const Path& compressed_weights_path, } } +Gemma::Gemma(const Path& tokenizer_path, const Path& compressed_weights_path, + Model model_type, hwy::ThreadPool& pool) + : Gemma(tokenizer_path, compressed_weights_path, Path{""}, model_type, + pool) {} + Gemma::~Gemma() = default; // after GemmaInterface is defined const sentencepiece::SentencePieceProcessor* Gemma::Tokenizer() const { diff --git a/gemma.h b/gemma.h index 8c4cab8..cdd4873 100644 --- a/gemma.h +++ b/gemma.h @@ -16,20 +16,29 @@ #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_H_ #define THIRD_PARTY_GEMMA_CPP_GEMMA_H_ +#include +#include #include #include #include +#include #include // copybara:import_next_line:gemma_cpp #include "compression/compress.h" // SfpStream/NuqStream +// copybara:end // copybara:import_next_line:gemma_cpp -#include "util/args.h" // Path +#include "configs.h" // kSeqLen +// copybara:end +// copybara:import_next_line:gemma_cpp +#include "util/args.h" // ArgsBase +// copybara:end #include "hwy/aligned_allocator.h" #include "hwy/base.h" // hwy::bfloat16_t #include "hwy/contrib/thread_pool/thread_pool.h" // copybara:import_next_line:sentencepiece #include "src/sentencepiece_processor.h" +// copybara:end namespace gcpp { @@ -66,8 +75,9 @@ struct GemmaInterface; struct Gemma { Gemma(const Path& tokenizer_path, const Path& compressed_weights_path, - const Path& weights_path, Model model_type, ModelTraining training, - hwy::ThreadPool& pool); + const Path& weights_path, Model model_type, hwy::ThreadPool& pool); + Gemma(const Path& tokenizer_path, const Path& compressed_weights_path, + Model model_type, hwy::ThreadPool& pool); ~Gemma(); // must be defined after GemmaInterface's dtor is defined. const sentencepiece::SentencePieceProcessor* Tokenizer() const; std::unique_ptr impl_; diff --git a/ops.h b/ops.h index 7aa7b62..481e1d7 100644 --- a/ops.h +++ b/ops.h @@ -341,21 +341,20 @@ static HWY_NOINLINE HWY_MAYBE_UNUSED float Dot(const float* HWY_RESTRICT a, static HWY_NOINLINE HWY_MAYBE_UNUSED float SquaredL2( const float* HWY_RESTRICT a, size_t size) { const hn::ScalableTag d; - using V = hn::Vec; const size_t N = hn::Lanes(d); HWY_DASSERT(size >= 2 * N); HWY_DASSERT(size % (2 * N) == 0); - V sum0 = hn::Zero(d); - V sum1 = hn::Zero(d); + auto sum0 = hn::Zero(d); + auto sum1 = hn::Zero(d); for (size_t i = 0; i <= size - 2 * N; i += 2 * N) { - const V a0 = hn::LoadU(d, a + i); - sum0 = hn::MulAdd(a0, a0, sum0); - const V a1 = hn::LoadU(d, a + i + N); - sum1 = hn::MulAdd(a1, a1, sum1); + const auto a0 = LoadU(d, a + i); + sum0 = MulAdd(a0, a0, sum0); + const auto a1 = LoadU(d, a + i + N); + sum1 = MulAdd(a1, a1, sum1); } - return hn::ReduceSum(d, hn::Add(sum0, sum1)); + return ReduceSum(d, Add(sum0, sum1)); } static HWY_NOINLINE HWY_MAYBE_UNUSED void RMSNorm( diff --git a/run.cc b/run.cc index 3f38031..45f6783 100644 --- a/run.cc +++ b/run.cc @@ -22,15 +22,19 @@ #include // NOLINT #include -// Placeholder for internal header, do not modify. +// Placeholder for internal header, do not modify. // copybara:strip // copybara:import_next_line:gemma_cpp #include "compression/compress.h" +// copybara:end // copybara:import_next_line:gemma_cpp #include "gemma.h" // Gemma +// copybara:end // copybara:import_next_line:gemma_cpp #include "util/app.h" +// copybara:end // copybara:import_next_line:gemma_cpp #include "util/args.h" // HasHelp +// copybara:end #include "hwy/base.h" #include "hwy/contrib/thread_pool/thread_pool.h" #include "hwy/highway.h" @@ -231,8 +235,8 @@ void Run(LoaderArgs& loader, InferenceArgs& inference, AppArgs& app) { [](uint64_t /*task*/, size_t thread) { PinThreadToCore(thread); }); } - gcpp::Gemma model(loader.tokenizer, loader.compressed_weights, loader.weights, - loader.ModelType(), loader.ModelTraining(), pool); + gcpp::Gemma model(loader.tokenizer, loader.compressed_weights, + loader.ModelType(), pool); auto kv_cache = CreateKVCache(loader.ModelType()); @@ -274,7 +278,9 @@ int main(int argc, char** argv) { { PROFILER_ZONE("Startup.misc"); + // copybara:strip_begin // Placeholder for internal init, do not modify. + // copybara:strip_end gcpp::LoaderArgs loader(argc, argv); gcpp::InferenceArgs inference(argc, argv); diff --git a/util/app.h b/util/app.h index ac37971..cd6cb6c 100644 --- a/util/app.h +++ b/util/app.h @@ -34,10 +34,15 @@ // copybara:import_next_line:gemma_cpp #include "configs.h" +// copybara:end + // copybara:import_next_line:gemma_cpp #include "gemma.h" +// copybara:end + // copybara:import_next_line:gemma_cpp #include "util/args.h" +// copybara:end #include "hwy/base.h" // HWY_ASSERT namespace gcpp {