From 12291e1ac06bde7e5c20cf8a5c717d3352d196f9 Mon Sep 17 00:00:00 2001 From: Krzysztof Ostrowski Date: Wed, 2 Oct 2024 14:02:48 -0700 Subject: [PATCH] Internal change. PiperOrigin-RevId: 681583569 --- gemma/configs.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/gemma/configs.h b/gemma/configs.h index b8eb4ea..51df334 100644 --- a/gemma/configs.h +++ b/gemma/configs.h @@ -38,6 +38,7 @@ namespace gcpp { static constexpr size_t kSeqLen = GEMMA_MAX_SEQLEN; static constexpr size_t kTopK = GEMMA_TOPK; +static constexpr size_t kVocabSize = 256000; using EmbedderInputT = hwy::bfloat16_t; @@ -187,7 +188,7 @@ struct ConfigGemma2_27B : public ConfigBaseGemmaV2 { using Weight = TWeight; // make accessible where we only have a TConfig static constexpr int kSeqLen = 8192; - static constexpr int kVocabSize = 256000; + static constexpr int kVocabSize = gcpp::kVocabSize; static constexpr std::array kLayerConfig = FixedLayerConfig<46>(LayerAttentionType::kGemma); static constexpr std::array kAttentionWindowSizes = @@ -211,7 +212,7 @@ struct ConfigGemma2_9B : public ConfigBaseGemmaV2 { using Weight = TWeight; // make accessible where we only have a TConfig static constexpr int kSeqLen = 8192; - static constexpr int kVocabSize = 256000; + static constexpr int kVocabSize = gcpp::kVocabSize; static constexpr std::array kLayerConfig = FixedLayerConfig<42>(LayerAttentionType::kGemma); static constexpr std::array kAttentionWindowSizes = @@ -234,7 +235,7 @@ struct ConfigGemma7B : public ConfigBaseGemmaV1 { using Weight = TWeight; // make accessible where we only have a TConfig static constexpr int kSeqLen = gcpp::kSeqLen; - static constexpr int kVocabSize = 256000; + static constexpr int kVocabSize = gcpp::kVocabSize; static constexpr std::array kLayerConfig = FixedLayerConfig<28>(LayerAttentionType::kGemma); static constexpr std::array kAttentionWindowSizes = @@ -256,7 +257,7 @@ struct ConfigGemma2B : public ConfigBaseGemmaV1 { using Weight = TWeight; // make accessible where we only have a TConfig static constexpr int kSeqLen = gcpp::kSeqLen; - static constexpr int kVocabSize = 256000; + static constexpr int kVocabSize = gcpp::kVocabSize; static constexpr std::array kLayerConfig = FixedLayerConfig<18>(LayerAttentionType::kGemma); static constexpr std::array kAttentionWindowSizes = @@ -310,7 +311,7 @@ struct ConfigGemma2_2B : public ConfigBaseGemmaV2 { using Weight = TWeight; // make accessible where we only have a TConfig static constexpr int kSeqLen = 8192; - static constexpr int kVocabSize = 256000; + static constexpr int kVocabSize = gcpp::kVocabSize; static constexpr std::array kLayerConfig = FixedLayerConfig<26>(LayerAttentionType::kGemma); static constexpr std::array kAttentionWindowSizes = @@ -363,7 +364,7 @@ struct ConfigGriffin2B : ConfigNoVit { // Griffin uses local attention, so kSeqLen is actually the local attention // window. static constexpr int kSeqLen = 2048; - static constexpr int kVocabSize = 256000; + static constexpr int kVocabSize = gcpp::kVocabSize; static constexpr std::array kLayerConfig = { LayerAttentionType::kGriffinRecurrentBlock, LayerAttentionType::kGriffinRecurrentBlock,