Simplified interface class and example for Gemma.cpp usage.

PiperOrigin-RevId: 720591037
2025-01-28 08:47:55 -08:00 · 2025-01-28 08:47:55 -08:00 · 23dac72463
parent 7af2e70321
commit 23dac72463
7 changed files with 328 additions and 2 deletions
--- a/examples/simplified_gemma/BUILD.bazel
+++ b/examples/simplified_gemma/BUILD.bazel
@ -0,0 +1,39 @@
 # Hello World example frontend to gemma.cpp.
 package(
    default_applicable_licenses = [
        "//:license",  # Placeholder comment, do not modify
    ],
    default_visibility = ["//visibility:public"],
 )
 cc_library(
    name = "gemma",
    hdrs = ["gemma.hpp"],
    deps = [
        "//:app",
        "//:args",
        "//:common",
        "//:gemma_lib",
        "//:threading",
        "//:tokenizer",
        "@highway//:hwy",
        "@highway//:thread_pool",
    ],
 )
 cc_binary(
    name = "simplified_gemma",
    srcs = ["run.cc"],
    deps = [
        ":gemma",
        # Placeholder for internal dep, do not remove.,
        "//:app",
        "//:args",
        "//:common",
        "//:gemma_lib",
        "//:threading",
        "//:tokenizer",
        "@highway//:hwy",
        "@highway//:thread_pool",
    ],
 )
--- a/examples/simplified_gemma/CMakeLists.txt
+++ b/examples/simplified_gemma/CMakeLists.txt
@ -0,0 +1,49 @@
 # Copyright 2019 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 cmake_minimum_required(VERSION 3.11)
 project(simplified_gemma)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 include(FetchContent)
 FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG f2209b911c74019e85d0b7a7a2833c9a2e1b7995)
 FetchContent_MakeAvailable(highway)
 FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c)
 FetchContent_MakeAvailable(sentencepiece)
 # Allow for both local and remote building)
 option(BUILD_MODE "'local' or 'remote' git fetch for builds")
 if (NOT BUILD_MODE)
  set(BUILD_MODE "remote")
 endif()
 if (BUILD_MODE STREQUAL "local")
  # Relative path to gemma.cpp from examples/simplified_gemma/build/
  FetchContent_Declare(gemma SOURCE_DIR ../../..) 
 else()
  FetchContent_Declare(gemma GIT_REPOSITORY https://github.com/google/gemma.cpp.git GIT_TAG a9aa63fd2ea6b786ed0706d619588bfe2d43370e)
 endif()
 FetchContent_MakeAvailable(gemma)
 if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE "Release")
 endif()
 add_executable(simplified_gemma run.cc)
 target_link_libraries(simplified_gemma hwy hwy_contrib sentencepiece libgemma)
 FetchContent_GetProperties(sentencepiece)
 target_include_directories(simplified_gemma PRIVATE ${sentencepiece_SOURCE_DIR})
 target_compile_definitions(simplified_gemma PRIVATE $<$<PLATFORM_ID:Windows>:_CRT_SECURE_NO_WARNINGS NOMINMAX>)
 target_compile_options(simplified_gemma PRIVATE $<$<PLATFORM_ID:Windows>:-Wno-deprecated-declarations>)
--- a/examples/simplified_gemma/README.md
+++ b/examples/simplified_gemma/README.md
@ -0,0 +1,60 @@
 # Simplified Gemma.cpp Example
 This is a minimal/template project for using `gemma.cpp` as a library. Instead
 of an interactive interface, it sets up the model state and generates text for a
 single hard coded prompt.
 Build steps are similar to the main `gemma` executable. For now only
 `cmake`/`make` is available for builds (PRs welcome for other build options).
 First use `cmake` to configure the project, starting from the `simplified_gemma`
 example directory (`gemma.cpp/examples/simplified_gemma`):
 ```sh
 cmake -B build
 ```
 This sets up a build configuration in `gemma.cpp/examples/simplified_gemma/build`.
 Note that this fetches `libgemma` from a git commit hash on github.
 Alternatively if you want to build using the local version of `gemma.cpp` use:
 ```sh
 cmake -B build -DBUILD_MODE=local
 ```
 Make sure you delete the contents of the build directory before changing
 configurations.
 Then use `make` to build the project:
 ```sh
 cd build
 make simplified_gemma
 ```
 As with the top-level `gemma.cpp` project you can use the `make` commands `-j`
 flag to use parallel threads for faster builds.
 From inside the `gemma.cpp/examples/simplified_gemma/build` directory, there should
 be a `simplified_gemma` executable. You can run it with the same 3 model arguments as
 gemma.cpp specifying the tokenizer, compressed weights file, and model type, for
 example:
 ```sh
 ./simplified_gemma --tokenizer tokenizer.spm --compressed_weights 2b-it-sfp.sbs --model 2b-it
 ```
 Should print a greeting to the terminal:
 ```
 "Hello, world! It's a pleasure to greet you all. May your day be filled with joy, peace, and all the things that make your heart soar.
 ```
 For a demonstration of constrained decoding, add the `--reject` flag followed by
 a list of token IDs (note that it must be the last flag, since it consumes every
 subsequent argument). For example, to reject variations of the word "greeting",
 run:
 ```sh
 ./simplified_gemma [...] --reject 32338 42360 78107 106837 132832 143859 154230 190205
 ```
--- a/examples/simplified_gemma/build/.gitignore
+++ b/examples/simplified_gemma/build/.gitignore
@ -0,0 +1,2 @@
 *
 !.gitignore
--- a/examples/simplified_gemma/gemma.hpp
+++ b/examples/simplified_gemma/gemma.hpp
@ -0,0 +1,114 @@
 // Copyright 2024 Google LLC
 // SPDX-License-Identifier: Apache-2.0
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by app_licable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <stddef.h>
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
 #include <random>
 #include <set>
 #include <string>
 #include <vector>
 #include "third_party/gemma_cpp/gemma/gemma.h"
 #include "third_party/gemma_cpp/gemma/tokenizer.h"
 #include "third_party/gemma_cpp/util/app.h"  // LoaderArgs
 #include "third_party/gemma_cpp/util/threading.h"
 #include "third_party/highway/hwy/base.h"
 #include "third_party/highway/hwy/contrib/thread_pool/thread_pool.h"
 class SimplifiedGemma {
 public:
  SimplifiedGemma(const gcpp::LoaderArgs& loader,
                  const gcpp::InferenceArgs& inference = gcpp::InferenceArgs(),
                  const gcpp::AppArgs& app = gcpp::AppArgs())
      : loader_(loader),
        inference_(inference),
        app_(app),
        pools_(gcpp::CreatePools(app_)),
        model_(gcpp::CreateGemma(loader_, pools_)) {
    Init();
  }
  SimplifiedGemma(int argc, char** argv)
      : loader_(argc, argv, /*validate=*/true),
        inference_(argc, argv),
        app_(argc, argv),
        pools_(gcpp::CreatePools(app_)),
        model_(gcpp::CreateGemma(loader_, pools_)) {
    Init();
  }
  void Init() {
    gcpp::Allocator::Init(pools_.Topology());
    // Instantiate model and KV Cache
    kv_cache_ = gcpp::KVCache::Create(model_.GetModelConfig(),
                                      inference_.prefill_tbatch_size);
    // Initialize random number generator
    std::random_device rd;
    gen_.seed(rd());
  }
  void Generate(std::string& prompt, size_t max_generated_tokens = 1024,
                float temperature = 0.7,
                const std::set<int>& reject_tokens = {}) {
    size_t generated = 0;
    const std::vector<int> tokens = gcpp::WrapAndTokenize(
        model_.Tokenizer(), loader_.Info(), generated, prompt);
    const size_t prompt_size = tokens.size();
    // This callback function gets invoked every time a token is generated
    auto stream_token = [&generated, &prompt_size, this](int token, float) {
      ++generated;
      if (generated < prompt_size) {
        // print feedback
      } else if (token != gcpp::EOS_ID) {
        std::string token_text;
        HWY_ASSERT(this->model_.Tokenizer().Decode({token}, &token_text));
        std::cout << token_text << std::flush;
      }
      return true;
    };
    gcpp::TimingInfo timing_info;
    gcpp::RuntimeConfig runtime_config = {
        .max_generated_tokens = max_generated_tokens,
        .temperature = temperature,
        .gen = &gen_,
        .verbosity = 0,
        .stream_token = stream_token,
        .accept_token =
            [&](int token, float /* prob */) {
              return !reject_tokens.contains(token);
            },
    };
    model_.Generate(runtime_config, tokens, 0, kv_cache_, timing_info);
  }
  ~SimplifiedGemma() = default;
 private:
  gcpp::LoaderArgs loader_;
  gcpp::InferenceArgs inference_;
  gcpp::AppArgs app_;
  gcpp::NestedPools pools_;
  gcpp::Gemma model_;
  gcpp::KVCache kv_cache_;
  std::mt19937 gen_;
  std::string validation_error_;
 };
--- a/examples/simplified_gemma/run.cc
+++ b/examples/simplified_gemma/run.cc
@ -0,0 +1,50 @@
 // Copyright 2024 Google LLC
 // SPDX-License-Identifier: Apache-2.0
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <stddef.h>
 #include <string>
 // Placeholder for internal header, do not modify.
 #include "third_party/gemma_cpp/examples/simplified_gemma/gemma.hpp"
 #include "util/app.h"  // LoaderArgs
 int main(int argc, char** argv) {
  {
    // Placeholder for internal init, do not modify.
  }
  // Standard usage: LoaderArgs takes argc and argv as input, then parses
  // necessary flags.
  gcpp::LoaderArgs loader(argc, argv, /*validate=*/true);
  // Optional: LoaderArgs can also take tokenizer and weights paths directly.
  //
  // gcpp::LoaderArgs loader("/path/to/tokenizer", "/path/to/weights",
  // "model_identifier");
  // Optional: InferenceArgs and AppArgs can be passed in as well. If not
  // specified, default values will be used.
  //
  // gcpp::InferenceArgs inference(argc, argv);
  // gcpp::AppArgs app(argc, argv);
  // SimplifiedGemma gemma(loader, inference, app);
  SimplifiedGemma gemma(loader);
  std::string prompt = "Write a greeting to the world.";
  gemma.Generate(prompt, 256, 0.6);
  return 0;
 }
--- a/util/app.h
+++ b/util/app.h
@ -126,15 +126,27 @@ static inline NestedPools CreatePools(const AppArgs& app) {
 }
 struct LoaderArgs : public ArgsBase<LoaderArgs> {
-  LoaderArgs(int argc, char* argv[]) {
+  LoaderArgs(int argc, char* argv[], bool validate = true) {
    InitAndParse(argc, argv);
    if (validate) {
      if (const char* error = Validate()) {
        HWY_ABORT("Invalid args: %s", error);
      }
    }
  }
  LoaderArgs(const std::string& tokenizer_path, const std::string& weights_path,
-             const std::string& model) {
+             const std::string& model, bool validate = true) {
    Init();  // Init sets to defaults, so assignments must come after Init().
    tokenizer.path = tokenizer_path;
    weights.path = weights_path;
    model_type_str = model;
    if (validate) {
      if (const char* error = Validate()) {
        HWY_ABORT("Invalid args: %s", error);
      }
    }
  };
  // Returns error string or nullptr if OK.