server : improve context checkpoint logic (#19408)

This commit is contained in:
Georgi Gerganov 2026-02-08 09:40:04 +02:00 committed by GitHub
parent 5999b50eb0
commit eb449cdfa4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 1 deletions

View File

@ -2507,7 +2507,8 @@ private:
slot.n_prompt_tokens_processed++;
// process the last few tokens of the prompt separately in order to allow for a checkpoint to be created.
if (do_checkpoint && slot.task->n_tokens() - slot.prompt.n_tokens() == 64) {
const int n_last = std::min(n_batch, 512);
if (do_checkpoint && slot.task->n_tokens() == slot.prompt.n_tokens() + n_last) {
break;
}
}