diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp index fdcd54a292..c5e8eb5af7 100644 --- a/src/llama-memory-recurrent.cpp +++ b/src/llama-memory-recurrent.cpp @@ -707,9 +707,10 @@ bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) { fprintf(stderr, "[MTP-FINDSLOT] checkpoint branch: seq_id=%d, cur_tail=%d, next_empty=%d\n", (int)seq_id, cur_tail, (int)next_empty_cell); fflush(stderr); - if (next_empty_cell < size && cells[next_empty_cell].is_empty()) { - bool can_checkpoint = (get_cell_count(seq_id) < 8 && used < size * 0.9); - if (!can_checkpoint) { + { + bool can_checkpoint = (next_empty_cell < size && cells[next_empty_cell].is_empty() && + get_cell_count(seq_id) < 8 && used < size * 0.9); + if (!can_checkpoint && get_cell_count(seq_id) >= 2) { // Try to evict the oldest checkpoint to make room int32_t oldest = -1; llama_pos min_pos = std::numeric_limits::max(); @@ -719,13 +720,19 @@ bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) { oldest = j; } } + fprintf(stderr, "[MTP-FINDSLOT] eviction: oldest=%d (pos=%d), cur_tail=%d\n", + oldest, (oldest >= 0 ? (int)cells[oldest].pos : -1), cur_tail); + fflush(stderr); if (oldest >= 0) { cells[oldest].seq_id.erase(seq_id); if (cells[oldest].is_empty()) { cells[oldest].pos = -1; cells[oldest].src = -1; used--; - next_empty_cell = oldest; // reuse the freed cell + next_empty_cell = oldest; + fprintf(stderr, "[MTP-FINDSLOT] evicted cell %d, next_empty=%d, used=%d\n", + oldest, (int)next_empty_cell, (int)used); + fflush(stderr); } can_checkpoint = true; }