(revert) kv-cache : do not quantize SWA KV cache (#21332)

This reverts commit 17193cce34.
2026-04-03 09:07:01 +03:00 · 2026-04-03 09:07:01 +03:00 · 39b27f0da0
parent f49e917876
commit 39b27f0da0
1 changed files with 1 additions and 2 deletions
--- a/src/llama-kv-cache-iswa.cpp
+++ b/src/llama-kv-cache-iswa.cpp
@ -66,9 +66,8 @@ llama_kv_cache_iswa::llama_kv_cache_iswa(

    LLAMA_LOG_INFO("%s: creating     SWA KV cache, size = %u cells\n", __func__, size_swa);

-    // note: the SWA cache is never quantized because it is relatively small
    kv_swa = std::make_unique<llama_kv_cache>(
-            model, GGML_TYPE_F16, GGML_TYPE_F16,
+            model, type_k, type_v,
            v_trans, offload, unified, size_swa, n_seq_max, n_pad,
            hparams.n_swa, hparams.swa_type, filter_swa, reuse);
 }