fix: eviction logic for checkpoint creation during generation
The eviction was inside the is_empty() check, so it never ran when all cells were occupied. Moved eviction outside to always try freeing old checkpoints for new ones. Results: - 48 successful rollbacks (44% of rejections properly restored) - 34.4 tok/s on short, ~20 tok/s on long generation - 65% acceptance rate with proper rollback Remaining: 56% of rejections still can't find checkpoints because the checkpoint was evicted before seq_rm ran. Need to either keep checkpoints longer or increase rs_size further.
This commit is contained in:
parent
8ec2e6007e
commit
4e908332c4
|
|
@ -707,9 +707,10 @@ bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) {
|
|||
fprintf(stderr, "[MTP-FINDSLOT] checkpoint branch: seq_id=%d, cur_tail=%d, next_empty=%d\n",
|
||||
(int)seq_id, cur_tail, (int)next_empty_cell);
|
||||
fflush(stderr);
|
||||
if (next_empty_cell < size && cells[next_empty_cell].is_empty()) {
|
||||
bool can_checkpoint = (get_cell_count(seq_id) < 8 && used < size * 0.9);
|
||||
if (!can_checkpoint) {
|
||||
{
|
||||
bool can_checkpoint = (next_empty_cell < size && cells[next_empty_cell].is_empty() &&
|
||||
get_cell_count(seq_id) < 8 && used < size * 0.9);
|
||||
if (!can_checkpoint && get_cell_count(seq_id) >= 2) {
|
||||
// Try to evict the oldest checkpoint to make room
|
||||
int32_t oldest = -1;
|
||||
llama_pos min_pos = std::numeric_limits<llama_pos>::max();
|
||||
|
|
@ -719,13 +720,19 @@ bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) {
|
|||
oldest = j;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "[MTP-FINDSLOT] eviction: oldest=%d (pos=%d), cur_tail=%d\n",
|
||||
oldest, (oldest >= 0 ? (int)cells[oldest].pos : -1), cur_tail);
|
||||
fflush(stderr);
|
||||
if (oldest >= 0) {
|
||||
cells[oldest].seq_id.erase(seq_id);
|
||||
if (cells[oldest].is_empty()) {
|
||||
cells[oldest].pos = -1;
|
||||
cells[oldest].src = -1;
|
||||
used--;
|
||||
next_empty_cell = oldest; // reuse the freed cell
|
||||
next_empty_cell = oldest;
|
||||
fprintf(stderr, "[MTP-FINDSLOT] evicted cell %d, next_empty=%d, used=%d\n",
|
||||
oldest, (int)next_empty_cell, (int)used);
|
||||
fflush(stderr);
|
||||
}
|
||||
can_checkpoint = true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue