From 73f1140dca92b42a5a5cf620ad3b6d9c0c35155e Mon Sep 17 00:00:00 2001 From: Rhett Stucki Date: Wed, 20 Aug 2025 22:59:24 -0700 Subject: [PATCH] Fix an off-by-one error after StreamAndUpdateEOS() to remove the MSAN warning about reading an uninitialized variable in the kv_cache. The logic for choosing whether or not to attend to the last token during prefill wasn't completely consistent with StreamAndUpdateEOS(), causing an off-by-one error that prevented the kv_cache from being fully populated. PiperOrigin-RevId: 797614310 --- gemma/gemma.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gemma/gemma.cc b/gemma/gemma.cc index a7b8423..95f52b8 100644 --- a/gemma/gemma.cc +++ b/gemma/gemma.cc @@ -520,6 +520,12 @@ static void GenerateT(const ModelConfig& config, const size_t last_pos_in_prompt = qbatch.Pos(qi) - qbatch.InitialPos(qi); StreamAndUpdateEOS(qi, qbatch.Prompt(qi)[last_pos_in_prompt], 0.0f, config, runtime_config, qbatch, non_eos); + // StreamAndUpdateEOS() sets the stream position one token too far in + // autoregressive mode. + const bool attend_to_last_token = (qbatch.Pos(qi) < qbatch.PrefixEnd(qi)); + if (!attend_to_last_token) { + qbatch.MutablePos(qi) -= 1; + } } size_t max_gen_steps = runtime_config.max_generated_tokens;