Gemma CPP: move PaliGemma tests' helper to a separate class

This helps to be able to use PaliGemma functionalities directly for inference by just providing tokenizer and weight paths. Added @mukundagg to allowed authors list. PiperOrigin-RevId: 772705238
2025-06-17 18:36:52 -07:00 · 2025-06-17 18:36:52 -07:00 · 606e22155a
parent f2adbfbcab
commit 606e22155a
4 changed files with 118 additions and 57 deletions
--- a/paligemma/BUILD.bazel
+++ b/paligemma/BUILD.bazel
@ -29,6 +29,24 @@ cc_test(
    ],
 )

+cc_library(
+    name = "paligemma_helper",
+    srcs = ["paligemma_helper.cc"],
+    hdrs = ["paligemma_helper.h"],
+    deps = [
+        ":image",
+        "//:allocator",
+        "//:benchmark_helper",
+        "//:configs",
+        "//:gemma_args",
+        "//:gemma_lib",
+        "//compression:types",
+        "//io",
+        "@highway//:hwy",
+        "@highway//:profiler",
+    ],
+)
+
 cc_test(
    name = "paligemma_test",
    srcs = ["paligemma_test.cc"],
@ -39,6 +57,8 @@ cc_test(
        "no_tap",
    ],
    deps = [
+        ":paligemma_helper",
+        "//devtools/build/runtime:get_runfiles_dir",
        "@googletest//:gtest_main",  # buildcleaner: keep
        "//:allocator",
        "//:benchmark_helper",
--- a/paligemma/paligemma_helper.cc
+++ b/paligemma/paligemma_helper.cc
@ -0,0 +1,68 @@
+#include "paligemma/paligemma_helper.h"
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <vector>
+#include "compression/types.h"
+#include "evals/benchmark_helper.h"
+#include "gemma/configs.h"
+#include "gemma/gemma.h"
+#include "util/allocator.h"
+#include "hwy/base.h"
+
+
+namespace gcpp {
+
+void PaliGemmaHelper::InitVit(const std::string& path) {
+  HWY_ASSERT(env_->GetGemma() != nullptr);
+  const Gemma& gemma = *(env_->GetGemma());
+  const ModelConfig& config = gemma.GetModelConfig();
+  HWY_ASSERT(config.wrapping == PromptWrapping::PALIGEMMA);
+
+  image_tokens_ = std::make_unique<ImageTokens>(
+      "image", Extents2D(config.vit_config.seq_len, config.model_dim),
+      MatPadding::kPacked);
+  image_tokens_->AllocateAndAttachRowPtrs(env_->Env().row_ptrs);
+  Image image;
+  HWY_ASSERT(image.ReadPPM(path));
+  const size_t image_size = config.vit_config.image_size;
+  image.Resize(image_size, image_size);
+  RuntimeConfig runtime_config = {.gen = &env_->MutableGen(),
+                                  .verbosity = 0};
+  gemma.GenerateImageTokens(runtime_config, env_->MutableKVCache().SeqLen(),
+                            image, *image_tokens_);
+}
+
+std::string PaliGemmaHelper::GemmaReply(const std::string& prompt_text) const {
+  const Gemma& model = *(env_->GetGemma());
+    env_->MutableGen().seed(0x12345678);
+
+    std::string response;
+    auto stream_token = [&](int token, float) {
+      std::string token_text;
+      HWY_ASSERT(
+          model.Tokenizer().Decode(std::vector<int>{token}, &token_text));
+      response += token_text;
+      return true;
+    };
+
+    std::string mutable_prompt = prompt_text;
+    std::vector<int> tokens = env_->WrapAndTokenize(mutable_prompt);
+    tokens.insert(tokens.begin(), image_tokens_->Rows(), 0);
+
+    RuntimeConfig runtime_config = {.max_generated_tokens = 512,
+                                    // PrefixLM sees/attends to all tokens.
+                                    .prefill_tbatch_size = tokens.size(),
+                                    .gen = &env_->MutableGen(),
+                                    .verbosity = 0,
+                                    .stream_token = stream_token,
+                                    .image_tokens = image_tokens_.get()};
+
+    const size_t prefix_end = tokens.size();
+    TimingInfo timing_info = {.verbosity = 0};
+    model.Generate(runtime_config, tokens, /*pos=*/0, prefix_end,
+                   env_->MutableKVCache(), timing_info);
+    return response;
+}
+
+}  // namespace gcpp
--- a/paligemma/paligemma_helper.h
+++ b/paligemma/paligemma_helper.h
@ -0,0 +1,25 @@
+#ifndef THIRD_PARTY_GEMMA_CPP_PALIGEMMA_PALIGEMMA_HELPER_H_
+#define THIRD_PARTY_GEMMA_CPP_PALIGEMMA_PALIGEMMA_HELPER_H_
+
+#include <memory>
+#include <string>
+#include "evals/benchmark_helper.h"
+#include "gemma/gemma_args.h"
+
+namespace gcpp {
+
+class PaliGemmaHelper {
+ public:
+  explicit PaliGemmaHelper(GemmaEnv* env) : env_(env) {};
+
+  void InitVit(const std::string& path);
+  std::string GemmaReply(const std::string& prompt_text) const;
+
+ private:
+  std::unique_ptr<ImageTokens> image_tokens_;
+  GemmaEnv* env_;
+};
+
+}  // namespace gcpp
+
+#endif  // THIRD_PARTY_GEMMA_CPP_PALIGEMMA_PALIGEMMA_HELPER_H_
--- a/paligemma/paligemma_test.cc
+++ b/paligemma/paligemma_test.cc
@ -17,16 +17,14 @@

 #include <memory>
 #include <string>
-#include <vector>

-#include "compression/types.h"
 #include "evals/benchmark_helper.h"
 #include "gemma/configs.h"
 #include "gemma/gemma.h"
 #include "io/io.h"
 #include "util/allocator.h"
-#include "hwy/base.h"
 #include "hwy/tests/hwy_gtest.h"
+#include "paligemma/paligemma_helper.h"

 // This test can be run manually with the downloaded PaliGemma weights.
 // It should pass for `paligemma-3b-mix-224` and `paligemma2-3b-pt-448`.
@ -41,63 +39,13 @@ GemmaEnv* s_env = nullptr;

 class PaliGemmaTest : public ::testing::Test {
 protected:
-  void InitVit(const std::string& path) {
-    ASSERT_NE(s_env->GetGemma(), nullptr);
-    const Gemma& gemma = *(s_env->GetGemma());
-    const ModelConfig& config = gemma.GetModelConfig();
-    HWY_ASSERT(config.wrapping == PromptWrapping::PALIGEMMA);
-
-    image_tokens_ = std::make_unique<ImageTokens>(
-        "image", Extents2D(config.vit_config.seq_len, config.model_dim),
-        MatPadding::kPacked);
-    image_tokens_->AllocateAndAttachRowPtrs(s_env->Env().row_ptrs);
-    Image image;
-    HWY_ASSERT(image.ReadPPM(path));
-    const size_t image_size = config.vit_config.image_size;
-    image.Resize(image_size, image_size);
-    RuntimeConfig runtime_config = {.gen = &s_env->MutableGen(),
-                                    .verbosity = 0};
-    gemma.GenerateImageTokens(runtime_config, s_env->MutableKVCache().SeqLen(),
-                              image, *image_tokens_);
-  }
-
-  std::string GemmaReply(const std::string& prompt_text) const {
-    const Gemma& model = *(s_env->GetGemma());
-    s_env->MutableGen().seed(0x12345678);
-
-    std::string response;
-    auto stream_token = [&](int token, float) {
-      std::string token_text;
-      HWY_ASSERT(
-          model.Tokenizer().Decode(std::vector<int>{token}, &token_text));
-      response += token_text;
-      return true;
-    };
-
-    std::string mutable_prompt = prompt_text;
-    std::vector<int> tokens = s_env->WrapAndTokenize(mutable_prompt);
-    tokens.insert(tokens.begin(), image_tokens_->Rows(), 0);
-
-    RuntimeConfig runtime_config = {.max_generated_tokens = 512,
-                                    // PrefixLM sees/attends to all tokens.
-                                    .prefill_tbatch_size = tokens.size(),
-                                    .gen = &s_env->MutableGen(),
-                                    .verbosity = 0,
-                                    .stream_token = stream_token,
-                                    .image_tokens = image_tokens_.get()};
-
-    const size_t prefix_end = tokens.size();
-    TimingInfo timing_info = {.verbosity = 0};
-    model.Generate(runtime_config, tokens, /*pos=*/0, prefix_end,
-                   s_env->MutableKVCache(), timing_info);
-    return response;
-  }
-
  void TestQuestion(const char* question, const char* expected_substring) {
    ASSERT_NE(s_env->GetGemma(), nullptr);
    std::string path = "paligemma/testdata/image.ppm";
-    InitVit(path);
-    const std::string reply = GemmaReply(question);
+
+    PaliGemmaHelper paligemma_helper(s_env);
+    paligemma_helper.InitVit(path);
+    const std::string reply = paligemma_helper.GemmaReply(question);
    fprintf(stderr, "'%s'\n\n", reply.c_str());
    EXPECT_TRUE(reply.find(expected_substring) != std::string::npos);  // NOLINT
  }