mirror of https://github.com/google/gemma.cpp.git
Further 1.02x prefill speedup from batch 64->512
Measured on SKX. Larger speedup expected for Zen4/SPR. PiperOrigin-RevId: 652472928
This commit is contained in:
parent
aaee666a1d
commit
cd530374b3
|
|
@ -36,7 +36,7 @@ ByteStorageT AllocateSizeof() {
|
|||
return hwy::AllocateAligned<uint8_t>(sizeof(T));
|
||||
}
|
||||
|
||||
constexpr size_t kPrefillBatchSize = 64;
|
||||
constexpr size_t kPrefillBatchSize = 512;
|
||||
constexpr size_t kDecodeBatchSize = 1;
|
||||
constexpr size_t kBatchedQueryBatchSize = 16;
|
||||
constexpr size_t kMinAdjustedPrefillBatchSize =
|
||||
|
|
|
|||
Loading…
Reference in New Issue