mirror of https://github.com/google/gemma.cpp.git
Move kGriffinLayers into ConfigNoSSM, set kGemmaLayers directly
For regular (non-SSM) Gemma models, kGriffinLayers is by definition always zero and kGemmaLayers is just the number of layers. PiperOrigin-RevId: 644384531
This commit is contained in:
parent
70506b0a62
commit
d7d9d14f0e
|
|
@ -74,6 +74,8 @@ constexpr size_t NumLayersOfTypeBefore(
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ConfigNoSSM {
|
struct ConfigNoSSM {
|
||||||
|
static constexpr int kGriffinLayers = 0;
|
||||||
|
|
||||||
static constexpr int kConv1dWidth = 0;
|
static constexpr int kConv1dWidth = 0;
|
||||||
static constexpr bool kFFBiases = false;
|
static constexpr bool kFFBiases = false;
|
||||||
static constexpr bool kSoftmaxAttnOutputBiases = false;
|
static constexpr bool kSoftmaxAttnOutputBiases = false;
|
||||||
|
|
@ -92,12 +94,7 @@ struct ConfigGemma7B : public ConfigNoSSM {
|
||||||
static constexpr std::array<LayerAttentionType, 28> kLayerConfig =
|
static constexpr std::array<LayerAttentionType, 28> kLayerConfig =
|
||||||
FixedLayerConfig<28>(LayerAttentionType::kGemma);
|
FixedLayerConfig<28>(LayerAttentionType::kGemma);
|
||||||
static constexpr int kLayers = kLayerConfig.size();
|
static constexpr int kLayers = kLayerConfig.size();
|
||||||
static constexpr int kGemmaLayers =
|
static constexpr int kGemmaLayers = kLayers;
|
||||||
NumLayersOfTypeBefore(kLayerConfig, LayerAttentionType::kGemma, kLayers);
|
|
||||||
static constexpr int kGriffinLayers =
|
|
||||||
NumLayersOfTypeBefore(kLayerConfig,
|
|
||||||
LayerAttentionType::kGriffinRecurrentBlock,
|
|
||||||
kLayers);
|
|
||||||
static constexpr int kModelDim = 3072;
|
static constexpr int kModelDim = 3072;
|
||||||
static constexpr int kFFHiddenDim = 16 * 3072 / 2; // = 24576
|
static constexpr int kFFHiddenDim = 16 * 3072 / 2; // = 24576
|
||||||
static constexpr int kHeads = 16;
|
static constexpr int kHeads = 16;
|
||||||
|
|
@ -117,10 +114,7 @@ struct ConfigGemma2B : public ConfigNoSSM {
|
||||||
static constexpr std::array<LayerAttentionType, 18> kLayerConfig =
|
static constexpr std::array<LayerAttentionType, 18> kLayerConfig =
|
||||||
FixedLayerConfig<18>(LayerAttentionType::kGemma);
|
FixedLayerConfig<18>(LayerAttentionType::kGemma);
|
||||||
static constexpr int kLayers = kLayerConfig.size();
|
static constexpr int kLayers = kLayerConfig.size();
|
||||||
static constexpr int kGemmaLayers =
|
static constexpr int kGemmaLayers = kLayers;
|
||||||
NumLayersOfTypeBefore(kLayerConfig, LayerAttentionType::kGemma, kLayers);
|
|
||||||
static constexpr int kGriffinLayers = NumLayersOfTypeBefore(
|
|
||||||
kLayerConfig, LayerAttentionType::kGriffinRecurrentBlock, kLayers);
|
|
||||||
static constexpr int kModelDim = 2048;
|
static constexpr int kModelDim = 2048;
|
||||||
static constexpr int kFFHiddenDim = 16 * 2048 / 2; // = 16384
|
static constexpr int kFFHiddenDim = 16 * 2048 / 2; // = 16384
|
||||||
static constexpr int kHeads = 8;
|
static constexpr int kHeads = 8;
|
||||||
|
|
@ -140,12 +134,7 @@ struct ConfigGemmaTiny : public ConfigNoSSM {
|
||||||
static constexpr std::array<LayerAttentionType, 3> kLayerConfig =
|
static constexpr std::array<LayerAttentionType, 3> kLayerConfig =
|
||||||
FixedLayerConfig<3>(LayerAttentionType::kGemma);
|
FixedLayerConfig<3>(LayerAttentionType::kGemma);
|
||||||
static constexpr int kLayers = kLayerConfig.size();
|
static constexpr int kLayers = kLayerConfig.size();
|
||||||
static constexpr int kGemmaLayers =
|
static constexpr int kGemmaLayers = kLayers;
|
||||||
NumLayersOfTypeBefore(kLayerConfig, LayerAttentionType::kGemma, kLayers);
|
|
||||||
static constexpr int kGriffinLayers =
|
|
||||||
NumLayersOfTypeBefore(kLayerConfig,
|
|
||||||
LayerAttentionType::kGriffinRecurrentBlock,
|
|
||||||
kLayers);
|
|
||||||
static constexpr int kModelDim = 128;
|
static constexpr int kModelDim = 128;
|
||||||
static constexpr int kFFHiddenDim = 256;
|
static constexpr int kFFHiddenDim = 256;
|
||||||
static constexpr int kHeads = 4;
|
static constexpr int kHeads = 4;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue