From 80b1347393763b0322dd7124888a4344f37b5910 Mon Sep 17 00:00:00 2001
From: "The gemma.cpp Authors" <no-reply@google.com>
Date: Fri, 21 Jun 2024 08:03:46 -0700
Subject: [PATCH] Skip the last RMSNormInplaceBatched in the Prefill phase.
 That only modifies activations.x, but it is called with prefill_activations
 which are not used after the Prefill call.

PiperOrigin-RevId: 645391387
---
 gemma/gemma.cc | 3 ---
 1 file changed, 3 deletions(-)
diff --git a/gemma/gemma.cc b/gemma/gemma.cc
index 1c07c9e..c035947 100644
--- a/gemma/gemma.cc
+++ b/gemma/gemma.cc
@@ -640,9 +640,6 @@ HWY_NOINLINE void Prefill(const int* tokens, size_t num_tokens, size_t pos,
     TransformerLayer(num_tokens, pos, layer, layer_weights, activations,
                      kv_cache, pool);
   }
-
-  RMSNormInplaceBatched<kBatchSize>(num_tokens, weights.final_norm_scale.data(),
-                                    activations.x.data(), TConfig::kModelDim);
 }
 
 // Compute the transformer for a batch of input tokens. During generation,