From 17193cce34036a6488b092ca79313d4ee1f895f5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 2 Apr 2026 11:54:05 +0300 Subject: [PATCH] kv-cache : do not quantize SWA KV cache (#21277) --- src/llama-kv-cache-iswa.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llama-kv-cache-iswa.cpp b/src/llama-kv-cache-iswa.cpp index 26e2cb4270..15b3fe16e8 100644 --- a/src/llama-kv-cache-iswa.cpp +++ b/src/llama-kv-cache-iswa.cpp @@ -66,8 +66,9 @@ llama_kv_cache_iswa::llama_kv_cache_iswa( LLAMA_LOG_INFO("%s: creating SWA KV cache, size = %u cells\n", __func__, size_swa); + // note: the SWA cache is never quantized because it is relatively small kv_swa = std::make_unique( - model, type_k, type_v, + model, GGML_TYPE_F16, GGML_TYPE_F16, v_trans, offload, unified, size_swa, n_seq_max, n_pad, hparams.n_swa, hparams.swa_type, filter_swa, reuse); }