From 2aef9767d982beb7ab61b3ae359366f627eb74dd Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Sat, 31 Jan 2026 05:15:49 +0100 Subject: [PATCH] Switch to base cache for position tracking Updated position tracking to use base cache instead of SWA cache to avoid kv cache token losses. --- src/llama-kv-cache-iswa.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/llama-kv-cache-iswa.cpp b/src/llama-kv-cache-iswa.cpp index 3a34102a23..3033e9891f 100644 --- a/src/llama-kv-cache-iswa.cpp +++ b/src/llama-kv-cache-iswa.cpp @@ -107,12 +107,14 @@ void llama_kv_cache_iswa::seq_div(llama_seq_id seq_id, llama_pos p0, llama_pos p } llama_pos llama_kv_cache_iswa::seq_pos_min(llama_seq_id seq_id) const { - // the base cache is a superset of the SWA cache, so we can just check the SWA cache - return kv_swa->seq_pos_min(seq_id); + // base cache preserves all positions (no SWA cell reuse), making it the ground truth for position tracking + // SWA cache can lose older positions when cells are reused for new tokens outside the SWA window + return kv_base->seq_pos_min(seq_id); } llama_pos llama_kv_cache_iswa::seq_pos_max(llama_seq_id seq_id) const { - return kv_swa->seq_pos_max(seq_id); + // base cache preserves all positions (no SWA cell reuse), making it the ground truth for position tracking + return kv_base->seq_pos_max(seq_id); } std::map llama_kv_cache_iswa::memory_breakdown() const {