Skip the last RMSNormInplaceBatched in the Prefill phase.

That only modifies activations.x, but it is called with prefill_activations which are not used after the Prefill call. PiperOrigin-RevId: 645391387
2024-06-21 08:03:46 -07:00 · 2024-06-21 08:03:46 -07:00 · 80b1347393
parent 82f16087ba
commit 80b1347393
1 changed files with 0 additions and 3 deletions
--- a/gemma/gemma.cc
+++ b/gemma/gemma.cc
@ -640,9 +640,6 @@ HWY_NOINLINE void Prefill(const int* tokens, size_t num_tokens, size_t pos,
    TransformerLayer(num_tokens, pos, layer, layer_weights, activations,
                     kv_cache, pool);
  }
  RMSNormInplaceBatched<kBatchSize>(num_tokens, weights.final_norm_scale.data(),
                                    activations.x.data(), TConfig::kModelDim);
 }
 // Compute the transformer for a batch of input tokens. During generation,