Switch to base cache for position tracking

Updated position tracking to use base cache instead of SWA cache to avoid kv cache token losses.
This commit is contained in:
John 2026-01-31 05:15:49 +01:00 committed by GitHub
parent 3dd95914d0
commit 2aef9767d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 5 additions and 3 deletions

View File

@ -107,12 +107,14 @@ void llama_kv_cache_iswa::seq_div(llama_seq_id seq_id, llama_pos p0, llama_pos p
}
llama_pos llama_kv_cache_iswa::seq_pos_min(llama_seq_id seq_id) const {
// the base cache is a superset of the SWA cache, so we can just check the SWA cache
return kv_swa->seq_pos_min(seq_id);
// base cache preserves all positions (no SWA cell reuse), making it the ground truth for position tracking
// SWA cache can lose older positions when cells are reused for new tokens outside the SWA window
return kv_base->seq_pos_min(seq_id);
}
llama_pos llama_kv_cache_iswa::seq_pos_max(llama_seq_id seq_id) const {
return kv_swa->seq_pos_max(seq_id);
// base cache preserves all positions (no SWA cell reuse), making it the ground truth for position tracking
return kv_base->seq_pos_max(seq_id);
}
std::map<ggml_backend_buffer_type_t, size_t> llama_kv_cache_iswa::memory_breakdown() const {