diff --git a/gemma/activations.h b/gemma/activations.h index 6f14900..60986fe 100644 --- a/gemma/activations.h +++ b/gemma/activations.h @@ -98,7 +98,10 @@ struct AttentionActivations { div_seq_len(static_cast(seq_len)), query_scale(ChooseQueryScale(config)) { - HWY_ASSERT(batch_size != 0); + if (batch_size == 0) { + HWY_WARN("Creating mostly empty activations with a batch_size of 0."); + return; + } // For MatMul outputs, precompute their row pointers. // If we forget any MatMul outputs here, debug builds print a warning but