libgemma refactor - review changes

2024-03-10 12:55:08 -04:00 · 2024-03-10 12:55:08 -04:00 · 0fc80fad05
parent cc5c24c4f8
commit 0fc80fad05
5 changed files with 38 additions and 33 deletions
--- a/examples/hello_world/CMakeLists.txt
+++ b/examples/hello_world/CMakeLists.txt
@ -14,7 +14,6 @@
 cmake_minimum_required(VERSION 3.11)
 project(hello_world)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 include(FetchContent)
@ -31,18 +30,18 @@ if (NOT BUILD_MODE)
  set(BUILD_MODE "remote")
 endif()
 if (BUILD_MODE STREQUAL "local")
  # Relative path to gemma.cpp from examples/hello_world/build/
  FetchContent_Declare(gemma SOURCE_DIR ../../..) 
 else()
  FetchContent_Declare(gemma GIT_REPOSITORY https://github.com/google/gemma.cpp.git GIT_TAG 8c7b2cf61b9794b806de091685dc6739dd3db837)
 endif()
-FetchContent_MakeAvailable(gemma)
+FetchContent_MakeAvailabl(gemma)
 if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE "Release")
 endif()
 add_executable(hello_world run.cc)
 set_property(TARGET hello_world PROPERTY CXX_STANDARD 17)
 target_link_libraries(hello_world hwy hwy_contrib sentencepiece libgemma)
 FetchContent_GetProperties(sentencepiece)
 target_include_directories(hello_world PRIVATE ${sentencepiece_SOURCE_DIR})
--- a/examples/hello_world/README.md
+++ b/examples/hello_world/README.md
@ -1,23 +1,29 @@
 # Hello World Example
-This is a minimal/template project for using `gemma.cpp` as a library. Instead of an interactive interface, it sets up the model state and generates text for a single hard coded prompt.
+This is a minimal/template project for using `gemma.cpp` as a library. Instead
 of an interactive interface, it sets up the model state and generates text for a
 single hard coded prompt.
-Build steps are similar to the main `gemma` executable. From inside the top-level directory. For now only `cmake`/`make` is available for builds (PRs welcome for other build options).
+Build steps are similar to the main `gemma` executable. For now only
 `cmake`/`make` is available for builds (PRs welcome for other build options).
-First use `cmake` to configure the project, assuming you are in the `hello_world` example directory (`gemma.cpp/examples/hello_world`):
+First use `cmake` to configure the project, starting from the `hello_world`
 example directory (`gemma.cpp/examples/hello_world`):
 ```sh
 cmake -B build
 ```
-This sets up a build configuration in `gemma.cpp/examples/hello_world/build`. Note that this fetches `libgemma` from a git commit hash on github. Alternatively if you want to build using the local version of `gemma.cpp` use:
+This sets up a build configuration in `gemma.cpp/examples/hello_world/build`.
-
+Note that this fetches `libgemma` from a git commit hash on github.
 Alternatively if you want to build using the local version of `gemma.cpp` use:
 ```sh
 cmake -B build -DBUILD_MODE=local
 ```
-Make sure you delete the contents of the build directory before changing configurations.
+Make sure you delete the contents of the build directory before changing
 configurations.
 Then use `make` to build the project:
@ -26,9 +32,13 @@ cd build
 make hello_world
 ```
-As with the top-level `gemma.cpp` project you can use the `make` commands `-j` flag to use parallel threads for faster builds.
+As with the top-level `gemma.cpp` project you can use the `make` commands `-j`
 flag to use parallel threads for faster builds.
-From inside the `gemma.cpp/examples/hello_world/build` directory, there should be a `hello_world` executable. You can run it with the same 3 model arguments as gemma.cpp specifying the tokenizer, compressed weights file, and model type, for example:
+From inside the `gemma.cpp/examples/hello_world/build` directory, there should
 be a `hello_world` executable. You can run it with the same 3 model arguments as
 gemma.cpp specifying the tokenizer, compressed weights file, and model type, for
 example:
 ```sh
 ./hello_world --tokenizer tokenizer.spm --compressed_weights 2b-it-sfp.sbs --model 2b-it
--- a/examples/hello_world/run.cc
+++ b/examples/hello_world/run.cc
@ -24,12 +24,12 @@
 #include "hwy/contrib/thread_pool/thread_pool.h"
 std::vector<int> tokenize(
-    std::string prompt_string,
+    const std::string& prompt_string,
    const sentencepiece::SentencePieceProcessor* tokenizer) {
-  prompt_string = "<start_of_turn>user\n" + prompt_string +
+  std::string formatted = "<start_of_turn>user\n" + prompt_string +
                          "<end_of_turn>\n<start_of_turn>model\n";
  std::vector<int> tokens;
-  HWY_ASSERT(tokenizer->Encode(prompt_string, &tokens).ok());
+  HWY_ASSERT(tokenizer->Encode(formatted, &tokens).ok());
  tokens.insert(tokens.begin(), 2);  // BOS token
  return tokens;
 }
--- a/gemma.cc
+++ b/gemma.cc
@ -261,8 +261,7 @@ KVCache CreateKVCache(Model type) {
 template <class Config>
 struct GemmaImpl : public GemmaInterface {
-  GemmaImpl(  // const LoaderArgs& args,
+  GemmaImpl(std::unique_ptr<sentencepiece::SentencePieceProcessor>& tokenizer,
      std::unique_ptr<sentencepiece::SentencePieceProcessor>& tokenizer,
            hwy::AlignedFreeUniquePtr<uint8_t[]>& compressed_weights,
            hwy::ThreadPool& pool);
@ -767,16 +766,10 @@ GemmaImpl<Config>::GemmaImpl(
    std::unique_ptr<sentencepiece::SentencePieceProcessor>& tokenizer,
    hwy::AlignedFreeUniquePtr<uint8_t[]>& compressed_weights,
    hwy::ThreadPool& pool)
    // GemmaImpl<Config>::GemmaImpl(const LoaderArgs& args, hwy::ThreadPool&
    // pool)
    : compressed_weights(std::move(compressed_weights)),
      // HWY_DYNAMIC_DISPATCH(GetCompressedWeightsT)(args, pool)),
      prefill(hwy::MakeUniqueAligned<Activations<Config, kPrefillBatchSize>>()),
      state(hwy::MakeUniqueAligned<Activations<Config, 1>>()),
-      tokenizer(std::move(tokenizer)) {
+      tokenizer(std::move(tokenizer)) {}
  // PROFILER_ZONE("Startup.tokenizer");
  // HWY_ASSERT(tokenizer.Load(args.tokenizer.path).ok());
 }
 template <>
 void GemmaImpl<ConfigGemma2B>::Generate(
@ -804,10 +797,14 @@ void GemmaImpl<ConfigGemma7B>::Generate(
 Gemma::Gemma(const Path& tokenizer_path, const Path& compressed_weights_path,
             const Path& weights_path, Model model_type,
             hwy::ThreadPool& pool) {
  {
    PROFILER_ZONE("Startup.tokenizer");
    std::unique_ptr<sentencepiece::SentencePieceProcessor> tokenizer =
        std::make_unique<sentencepiece::SentencePieceProcessor>();
-  HWY_ASSERT(tokenizer->Load(tokenizer_path.path).ok());
+    if (!tokenizer->Load(tokenizer_path.path).ok()) {
      HWY_ABORT("Failed to load the tokenizer file.");
    }
  }
  auto compressed_weights = HWY_DYNAMIC_DISPATCH(GetCompressedWeightsT)(
      model_type, weights_path, compressed_weights_path, pool);
  switch (model_type) {
--- a/run.cc
+++ b/run.cc
@ -190,7 +190,6 @@ void ReplGemma(gcpp::Gemma& model, gcpp::KVCache& kv_cache,
      }
    }
    // HWY_ASSERT(model.Tokenizer().Encode(prompt_string, &prompt).ok());
    HWY_ASSERT(model.Tokenizer()->Encode(prompt_string, &prompt).ok());
    // For both pre-trained and instruction-tuned models: prepend "<bos>" token