From b3239bf509e2ade75d92724a7bd1877af2782e50 Mon Sep 17 00:00:00 2001
From: Krzysztof Ostrowski <ostrowski@google.com>
Date: Wed, 2 Oct 2024 11:32:07 -0700
Subject: [PATCH] Internal change.

PiperOrigin-RevId: 681530185
---
 BUILD.bazel                      | 2 +-
 backprop/backward_scalar_test.cc | 1 +
 backprop/backward_test.cc        | 1 +
 gemma/configs.h                  | 9 +++++++--
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/BUILD.bazel b/BUILD.bazel
index 558c4e7..f38fc0d 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -105,7 +105,7 @@ cc_test(
 cc_test(
     name = "ops_test",
     size = "small",
-    timeout = "long",
+    timeout = "eternal",
     srcs = ["ops/ops_test.cc"],
     local_defines = ["HWY_IS_TEST"],
     # for test_suite.
diff --git a/backprop/backward_scalar_test.cc b/backprop/backward_scalar_test.cc
index 121d6a8..15aa876 100644
--- a/backprop/backward_scalar_test.cc
+++ b/backprop/backward_scalar_test.cc
@@ -387,6 +387,7 @@ struct TestConfig : ConfigBaseGemmaV2 {
   static constexpr std::array<LayerAttentionType, 2> kLayerConfig =
       FixedLayerConfig<2>(LayerAttentionType::kGemma);
   static constexpr int kLayers = kLayerConfig.size();
+  static constexpr int kNumTensorScales = 4 * kLayers;
   static constexpr bool kAbsolutePE = false;
   static constexpr PostNormType kPostNorm = PostNormType::None;
 
diff --git a/backprop/backward_test.cc b/backprop/backward_test.cc
index b6c0780..decec20 100644
--- a/backprop/backward_test.cc
+++ b/backprop/backward_test.cc
@@ -195,6 +195,7 @@ struct TestConfig : public ConfigBaseGemmaV2 {
   static constexpr std::array<LayerAttentionType, 2> kLayerConfig =
       FixedLayerConfig<2>(LayerAttentionType::kGemma);
   static constexpr int kLayers = kLayerConfig.size();
+  static constexpr int kNumTensorScales = 4 * kLayers;
   static constexpr bool kAbsolutePE = false;
   static constexpr PostNormType kPostNorm = PostNormType::None;
 
diff --git a/gemma/configs.h b/gemma/configs.h
index c78ab87..b8eb4ea 100644
--- a/gemma/configs.h
+++ b/gemma/configs.h
@@ -164,8 +164,6 @@ struct ConfigNoSSM : ConfigNoVit {
   static constexpr bool kUseHalfRope = false;
   static constexpr bool kUseLocalAttention = false;
   static constexpr bool kInterleaveQKV = true;
-  static constexpr int kNumTensorScales = 0;
-
   static constexpr PostQKType kPostQK = PostQKType::Rope;
   static constexpr ActivationType kActivation = ActivationType::Gelu;
   static constexpr ResidualType kResidual = ResidualType::Add;
@@ -195,6 +193,7 @@ struct ConfigGemma2_27B : public ConfigBaseGemmaV2 {
   static constexpr std::array<size_t, 46> kAttentionWindowSizes =
       RepeatedAttentionWindowSizes<46, 2>({4096, kSeqLen});
   static constexpr int kLayers = kLayerConfig.size();
+  static constexpr int kNumTensorScales = 4 * kLayers;
   static constexpr int kGemmaLayers = kLayers;
   static constexpr int kModelDim = 4608;
   static constexpr int kFFHiddenDim = 16 * 4608 / 2;  // = 36864
@@ -218,6 +217,7 @@ struct ConfigGemma2_9B : public ConfigBaseGemmaV2 {
   static constexpr std::array<size_t, 42> kAttentionWindowSizes =
       RepeatedAttentionWindowSizes<42, 2>({4096, kSeqLen});
   static constexpr int kLayers = kLayerConfig.size();
+  static constexpr int kNumTensorScales = 4 * kLayers;
   static constexpr int kGemmaLayers = kLayers;
   static constexpr int kModelDim = 3584;
   static constexpr int kFFHiddenDim = 8 * 3584 / 2;  // = 14336
@@ -240,6 +240,7 @@ struct ConfigGemma7B : public ConfigBaseGemmaV1 {
   static constexpr std::array<size_t, 28> kAttentionWindowSizes =
       FixedAttentionWindowSizes<28>(kSeqLen);
   static constexpr int kLayers = kLayerConfig.size();
+  static constexpr int kNumTensorScales = 4 * kLayers;
   static constexpr int kGemmaLayers = kLayers;
   static constexpr int kModelDim = 3072;
   static constexpr int kFFHiddenDim = 16 * 3072 / 2;  // = 24576
@@ -261,6 +262,7 @@ struct ConfigGemma2B : public ConfigBaseGemmaV1 {
   static constexpr std::array<size_t, 18> kAttentionWindowSizes =
       FixedAttentionWindowSizes<18>(kSeqLen);
   static constexpr int kLayers = kLayerConfig.size();
+  static constexpr int kNumTensorScales = 4 * kLayers;
   static constexpr int kGemmaLayers = kLayers;
   static constexpr int kModelDim = 2048;
   static constexpr int kFFHiddenDim = 16 * 2048 / 2;  // = 16384
@@ -285,6 +287,7 @@ struct ConfigPaliGemma_224 : public ConfigGemma2B<TWeight> {
     static constexpr std::array<LayerAttentionType, 27> kLayerConfig =
         FixedLayerConfig<27>(LayerAttentionType::kVit);
     static constexpr int kLayers = kLayerConfig.size();
+    static constexpr int kNumTensorScales = 4 * kLayers;
     static constexpr int kModelDim = 1152;
     static constexpr int kFFHiddenDim = 4304;
     static constexpr int kHeads = 16;
@@ -313,6 +316,7 @@ struct ConfigGemma2_2B : public ConfigBaseGemmaV2 {
   static constexpr std::array<size_t, 26> kAttentionWindowSizes =
       RepeatedAttentionWindowSizes<26, 2>({4096, kSeqLen});
   static constexpr int kLayers = kLayerConfig.size();
+  static constexpr int kNumTensorScales = 4 * kLayers;
   static constexpr int kGemmaLayers = kLayers;
   static constexpr int kModelDim = 2304;
   static constexpr int kFFHiddenDim = 8 * 2304 / 2;  // = 9216
@@ -335,6 +339,7 @@ struct ConfigGemmaTiny : public ConfigNoSSM {
   static constexpr std::array<size_t, 3> kAttentionWindowSizes =
       FixedAttentionWindowSizes<3>(kSeqLen);
   static constexpr int kLayers = kLayerConfig.size();
+  static constexpr int kNumTensorScales = 4 * kLayers;
   static constexpr int kGemmaLayers = kLayers;
   static constexpr int kModelDim = 128;
   static constexpr int kFFHiddenDim = 256;