From 4e908332c4b47ae86796c35d4c1cdfa23799eb9e Mon Sep 17 00:00:00 2001 From: itigges22 Date: Fri, 20 Mar 2026 17:12:49 -0400 Subject: [PATCH] fix: eviction logic for checkpoint creation during generation The eviction was inside the is_empty() check, so it never ran when all cells were occupied. Moved eviction outside to always try freeing old checkpoints for new ones. Results: - 48 successful rollbacks (44% of rejections properly restored) - 34.4 tok/s on short, ~20 tok/s on long generation - 65% acceptance rate with proper rollback Remaining: 56% of rejections still can't find checkpoints because the checkpoint was evicted before seq_rm ran. Need to either keep checkpoints longer or increase rs_size further. --- src/llama-memory-recurrent.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp index fdcd54a292..c5e8eb5af7 100644 --- a/src/llama-memory-recurrent.cpp +++ b/src/llama-memory-recurrent.cpp @@ -707,9 +707,10 @@ bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) { fprintf(stderr, "[MTP-FINDSLOT] checkpoint branch: seq_id=%d, cur_tail=%d, next_empty=%d\n", (int)seq_id, cur_tail, (int)next_empty_cell); fflush(stderr); - if (next_empty_cell < size && cells[next_empty_cell].is_empty()) { - bool can_checkpoint = (get_cell_count(seq_id) < 8 && used < size * 0.9); - if (!can_checkpoint) { + { + bool can_checkpoint = (next_empty_cell < size && cells[next_empty_cell].is_empty() && + get_cell_count(seq_id) < 8 && used < size * 0.9); + if (!can_checkpoint && get_cell_count(seq_id) >= 2) { // Try to evict the oldest checkpoint to make room int32_t oldest = -1; llama_pos min_pos = std::numeric_limits::max(); @@ -719,13 +720,19 @@ bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) { oldest = j; } } + fprintf(stderr, "[MTP-FINDSLOT] eviction: oldest=%d (pos=%d), cur_tail=%d\n", + oldest, (oldest >= 0 ? (int)cells[oldest].pos : -1), cur_tail); + fflush(stderr); if (oldest >= 0) { cells[oldest].seq_id.erase(seq_id); if (cells[oldest].is_empty()) { cells[oldest].pos = -1; cells[oldest].src = -1; used--; - next_empty_cell = oldest; // reuse the freed cell + next_empty_cell = oldest; + fprintf(stderr, "[MTP-FINDSLOT] evicted cell %d, next_empty=%d, used=%d\n", + oldest, (int)next_empty_cell, (int)used); + fflush(stderr); } can_checkpoint = true; }