From 96cfc4992c625984d99f3529d30bedab1e1ecf93 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 9 Mar 2026 16:47:06 +0200 Subject: [PATCH] server : fix checkpoints n_tokens calculation (#20287) --- tools/server/server-context.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 67ac5112eb..3541d910d8 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -2141,6 +2141,9 @@ private: if (slot.state == SLOT_STATE_PROCESSING_PROMPT || slot.state == SLOT_STATE_STARTED) { const auto & input_tokens = slot.task->tokens; + // used to determine the number of tokens added to the batch for the current slot + const auto n_tokens_prev = batch.n_tokens; + // TODO: maybe move branch to outside of this loop in the future if (slot.state == SLOT_STATE_STARTED) { slot.t_start_process_prompt = ggml_time_us(); @@ -2533,6 +2536,9 @@ private: } } + // the number of tokens added to the batch for the current slot + const auto n_tokens_cur = batch.n_tokens - n_tokens_prev; + // entire prompt has been processed if (slot.prompt.n_tokens() == slot.task->n_tokens()) { slot.state = SLOT_STATE_DONE_PROMPT; @@ -2593,7 +2599,7 @@ private: auto & cur = slot.prompt.checkpoints.emplace_back(server_prompt_checkpoint{ /*.pos_min = */ pos_min, /*.pos_max = */ pos_max, - /*.n_tokens = */ slot.prompt.n_tokens() - batch.n_tokens, + /*.n_tokens = */ slot.prompt.n_tokens() - n_tokens_cur, /*.data = */ std::vector(checkpoint_size), });