mirror of https://github.com/google/gemma.cpp.git
Skip the last RMSNormInplaceBatched in the Prefill phase.
That only modifies activations.x, but it is called with prefill_activations which are not used after the Prefill call. PiperOrigin-RevId: 645391387
This commit is contained in:
parent
82f16087ba
commit
80b1347393
|
|
@ -640,9 +640,6 @@ HWY_NOINLINE void Prefill(const int* tokens, size_t num_tokens, size_t pos,
|
||||||
TransformerLayer(num_tokens, pos, layer, layer_weights, activations,
|
TransformerLayer(num_tokens, pos, layer, layer_weights, activations,
|
||||||
kv_cache, pool);
|
kv_cache, pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
RMSNormInplaceBatched<kBatchSize>(num_tokens, weights.final_norm_scale.data(),
|
|
||||||
activations.x.data(), TConfig::kModelDim);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute the transformer for a batch of input tokens. During generation,
|
// Compute the transformer for a batch of input tokens. During generation,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue