Skip the last RMSNormInplaceBatched in the Prefill phase.

That only modifies activations.x, but it is called with prefill_activations which are not used after the Prefill call.

PiperOrigin-RevId: 645391387
This commit is contained in:
The gemma.cpp Authors 2024-06-21 08:03:46 -07:00 committed by Copybara-Service
parent 82f16087ba
commit 80b1347393
1 changed files with 0 additions and 3 deletions

View File

@ -640,9 +640,6 @@ HWY_NOINLINE void Prefill(const int* tokens, size_t num_tokens, size_t pos,
TransformerLayer(num_tokens, pos, layer, layer_weights, activations,
kv_cache, pool);
}
RMSNormInplaceBatched<kBatchSize>(num_tokens, weights.final_norm_scale.data(),
activations.x.data(), TConfig::kModelDim);
}
// Compute the transformer for a batch of input tokens. During generation,