kv-cache : do not quantize SWA KV cache (#21277)

This commit is contained in:
Georgi Gerganov 2026-04-02 11:54:05 +03:00 committed by GitHub
parent d6dac92bfd
commit 17193cce34
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 1 deletions

View File

@ -66,8 +66,9 @@ llama_kv_cache_iswa::llama_kv_cache_iswa(
LLAMA_LOG_INFO("%s: creating SWA KV cache, size = %u cells\n", __func__, size_swa);
// note: the SWA cache is never quantized because it is relatively small
kv_swa = std::make_unique<llama_kv_cache>(
model, type_k, type_v,
model, GGML_TYPE_F16, GGML_TYPE_F16,
v_trans, offload, unified, size_swa, n_seq_max, n_pad,
hparams.n_swa, hparams.swa_type, filter_swa, reuse);
}