fix: eviction logic for checkpoint creation during generation

The eviction was inside the is_empty() check, so it never ran when
all cells were occupied. Moved eviction outside to always try freeing
old checkpoints for new ones.

Results:
- 48 successful rollbacks (44% of rejections properly restored)
- 34.4 tok/s on short, ~20 tok/s on long generation
- 65% acceptance rate with proper rollback

Remaining: 56% of rejections still can't find checkpoints because
the checkpoint was evicted before seq_rm ran. Need to either keep
checkpoints longer or increase rs_size further.
This commit is contained in:
itigges22 2026-03-20 17:12:49 -04:00
parent 8ec2e6007e
commit 4e908332c4
1 changed files with 11 additions and 4 deletions

View File

@ -707,9 +707,10 @@ bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) {
fprintf(stderr, "[MTP-FINDSLOT] checkpoint branch: seq_id=%d, cur_tail=%d, next_empty=%d\n",
(int)seq_id, cur_tail, (int)next_empty_cell);
fflush(stderr);
if (next_empty_cell < size && cells[next_empty_cell].is_empty()) {
bool can_checkpoint = (get_cell_count(seq_id) < 8 && used < size * 0.9);
if (!can_checkpoint) {
{
bool can_checkpoint = (next_empty_cell < size && cells[next_empty_cell].is_empty() &&
get_cell_count(seq_id) < 8 && used < size * 0.9);
if (!can_checkpoint && get_cell_count(seq_id) >= 2) {
// Try to evict the oldest checkpoint to make room
int32_t oldest = -1;
llama_pos min_pos = std::numeric_limits<llama_pos>::max();
@ -719,13 +720,19 @@ bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) {
oldest = j;
}
}
fprintf(stderr, "[MTP-FINDSLOT] eviction: oldest=%d (pos=%d), cur_tail=%d\n",
oldest, (oldest >= 0 ? (int)cells[oldest].pos : -1), cur_tail);
fflush(stderr);
if (oldest >= 0) {
cells[oldest].seq_id.erase(seq_id);
if (cells[oldest].is_empty()) {
cells[oldest].pos = -1;
cells[oldest].src = -1;
used--;
next_empty_cell = oldest; // reuse the freed cell
next_empty_cell = oldest;
fprintf(stderr, "[MTP-FINDSLOT] evicted cell %d, next_empty=%d, used=%d\n",
oldest, (int)next_empty_cell, (int)used);
fflush(stderr);
}
can_checkpoint = true;
}