server : improve context checkpoint logic (#19408)
This commit is contained in:
parent
5999b50eb0
commit
eb449cdfa4
|
|
@ -2507,7 +2507,8 @@ private:
|
||||||
slot.n_prompt_tokens_processed++;
|
slot.n_prompt_tokens_processed++;
|
||||||
|
|
||||||
// process the last few tokens of the prompt separately in order to allow for a checkpoint to be created.
|
// process the last few tokens of the prompt separately in order to allow for a checkpoint to be created.
|
||||||
if (do_checkpoint && slot.task->n_tokens() - slot.prompt.n_tokens() == 64) {
|
const int n_last = std::min(n_batch, 512);
|
||||||
|
if (do_checkpoint && slot.task->n_tokens() == slot.prompt.n_tokens() + n_last) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue