diff --git a/gemma/activations.h b/gemma/activations.h index 49fc6d6..9db3dee 100644 --- a/gemma/activations.h +++ b/gemma/activations.h @@ -19,6 +19,7 @@ #include // sqrtf #include +#include #include #include "gemma/configs.h" // ModelConfig @@ -99,8 +100,12 @@ struct AttentionActivations { div_seq_len(static_cast(seq_len)), query_scale(ChooseQueryScale(config)) { + // Batch size can be 0 in experimental code so do not assert. if (batch_size == 0) { - HWY_WARN("Creating mostly empty activations with a batch_size of 0."); + static std::atomic_flag warned = ATOMIC_FLAG_INIT; + if (!warned.test_and_set()) { + HWY_WARN("Creating mostly empty activations with a batch_size of 0."); + } return; }