From 80b1347393763b0322dd7124888a4344f37b5910 Mon Sep 17 00:00:00 2001 From: "The gemma.cpp Authors" Date: Fri, 21 Jun 2024 08:03:46 -0700 Subject: [PATCH] Skip the last RMSNormInplaceBatched in the Prefill phase. That only modifies activations.x, but it is called with prefill_activations which are not used after the Prefill call. PiperOrigin-RevId: 645391387 --- gemma/gemma.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/gemma/gemma.cc b/gemma/gemma.cc index 1c07c9e..c035947 100644 --- a/gemma/gemma.cc +++ b/gemma/gemma.cc @@ -640,9 +640,6 @@ HWY_NOINLINE void Prefill(const int* tokens, size_t num_tokens, size_t pos, TransformerLayer(num_tokens, pos, layer, layer_weights, activations, kv_cache, pool); } - - RMSNormInplaceBatched(num_tokens, weights.final_norm_scale.data(), - activations.x.data(), TConfig::kModelDim); } // Compute the transformer for a batch of input tokens. During generation,