From cd530374b36302aff59b1e73a41d9cbcc4734d2c Mon Sep 17 00:00:00 2001 From: Jan Wassenberg Date: Mon, 15 Jul 2024 07:25:25 -0700 Subject: [PATCH] Further 1.02x prefill speedup from batch 64->512 Measured on SKX. Larger speedup expected for Zen4/SPR. PiperOrigin-RevId: 652472928 --- gemma/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma/common.h b/gemma/common.h index 0ca7e5c..663b2ca 100644 --- a/gemma/common.h +++ b/gemma/common.h @@ -36,7 +36,7 @@ ByteStorageT AllocateSizeof() { return hwy::AllocateAligned(sizeof(T)); } -constexpr size_t kPrefillBatchSize = 64; +constexpr size_t kPrefillBatchSize = 512; constexpr size_t kDecodeBatchSize = 1; constexpr size_t kBatchedQueryBatchSize = 16; constexpr size_t kMinAdjustedPrefillBatchSize =