diff --git a/gemma/configs.cc b/gemma/configs.cc index d980b3b..276c8f9 100644 --- a/gemma/configs.cc +++ b/gemma/configs.cc @@ -35,6 +35,8 @@ static ModelConfig ConfigBaseGemmaV2() { ModelConfig config = ConfigNoSSM(); config.att_cap = 50.0f; config.final_cap = 30.0f; + config.eos_id = 1; + config.secondary_eos_id = 107; return config; } diff --git a/gemma/run.cc b/gemma/run.cc index dab48a1..254d13f 100644 --- a/gemma/run.cc +++ b/gemma/run.cc @@ -85,7 +85,6 @@ void ReplGemma(Gemma& model, KVCache& kv_cache, const AppArgs& app, size_t abs_pos = 0; // across turns size_t tokens_generated_this_turn = 0; // differentiates prefill from reply size_t prompt_size = 0; - bool end_of_turn_seen = false; std::mt19937 gen; InitGenerator(args, gen); @@ -140,13 +139,6 @@ void ReplGemma(Gemma& model, KVCache& kv_cache, const AppArgs& app, std::cout << "\n\n"; } } - if (token_text == "") { - // We don't want to show the token to the user. - // We also need to remember that we've seen it, so that we can rewind - // abs_pos appropriately. We expect EOS as the next token. - end_of_turn_seen = true; - return true; - } std::cout << token_text << std::flush; return true; }; @@ -232,13 +224,6 @@ void ReplGemma(Gemma& model, KVCache& kv_cache, const AppArgs& app, HWY_ASSERT(abs_pos > 0); abs_pos--; } - if (end_of_turn_seen && abs_pos > 0) { - // If we have seen an end_of_turn token, we need to rewind abs_pos by one - // more, because we will prepend it again to the prompt in - // WrapAndTokenize. - abs_pos--; - } - end_of_turn_seen = false; } }