Move kGriffinLayers into ConfigNoSSM, set kGemmaLayers directly

For regular (non-SSM) Gemma models, kGriffinLayers is by definition always zero
and kGemmaLayers is just the number of layers.

PiperOrigin-RevId: 644384531
This commit is contained in:
Paul Chang 2024-06-18 07:52:16 -07:00 committed by Copybara-Service
parent 70506b0a62
commit d7d9d14f0e
1 changed files with 5 additions and 16 deletions

View File

@ -74,6 +74,8 @@ constexpr size_t NumLayersOfTypeBefore(
}
struct ConfigNoSSM {
static constexpr int kGriffinLayers = 0;
static constexpr int kConv1dWidth = 0;
static constexpr bool kFFBiases = false;
static constexpr bool kSoftmaxAttnOutputBiases = false;
@ -92,12 +94,7 @@ struct ConfigGemma7B : public ConfigNoSSM {
static constexpr std::array<LayerAttentionType, 28> kLayerConfig =
FixedLayerConfig<28>(LayerAttentionType::kGemma);
static constexpr int kLayers = kLayerConfig.size();
static constexpr int kGemmaLayers =
NumLayersOfTypeBefore(kLayerConfig, LayerAttentionType::kGemma, kLayers);
static constexpr int kGriffinLayers =
NumLayersOfTypeBefore(kLayerConfig,
LayerAttentionType::kGriffinRecurrentBlock,
kLayers);
static constexpr int kGemmaLayers = kLayers;
static constexpr int kModelDim = 3072;
static constexpr int kFFHiddenDim = 16 * 3072 / 2; // = 24576
static constexpr int kHeads = 16;
@ -117,10 +114,7 @@ struct ConfigGemma2B : public ConfigNoSSM {
static constexpr std::array<LayerAttentionType, 18> kLayerConfig =
FixedLayerConfig<18>(LayerAttentionType::kGemma);
static constexpr int kLayers = kLayerConfig.size();
static constexpr int kGemmaLayers =
NumLayersOfTypeBefore(kLayerConfig, LayerAttentionType::kGemma, kLayers);
static constexpr int kGriffinLayers = NumLayersOfTypeBefore(
kLayerConfig, LayerAttentionType::kGriffinRecurrentBlock, kLayers);
static constexpr int kGemmaLayers = kLayers;
static constexpr int kModelDim = 2048;
static constexpr int kFFHiddenDim = 16 * 2048 / 2; // = 16384
static constexpr int kHeads = 8;
@ -140,12 +134,7 @@ struct ConfigGemmaTiny : public ConfigNoSSM {
static constexpr std::array<LayerAttentionType, 3> kLayerConfig =
FixedLayerConfig<3>(LayerAttentionType::kGemma);
static constexpr int kLayers = kLayerConfig.size();
static constexpr int kGemmaLayers =
NumLayersOfTypeBefore(kLayerConfig, LayerAttentionType::kGemma, kLayers);
static constexpr int kGriffinLayers =
NumLayersOfTypeBefore(kLayerConfig,
LayerAttentionType::kGriffinRecurrentBlock,
kLayers);
static constexpr int kGemmaLayers = kLayers;
static constexpr int kModelDim = 128;
static constexpr int kFFHiddenDim = 256;
static constexpr int kHeads = 4;