diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index ceafcac179..8aab0d4c1b 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -77,6 +77,7 @@ struct server_slot { size_t last_nl_pos = 0; std::string generated_text; + std::string debug_generated_text; llama_tokens generated_tokens; // idx of draft tokens in the main batch @@ -425,7 +426,7 @@ struct server_slot { if (!only_metrics) { res["prompt"] = ptask->tokens.detokenize(ctx, true); - res["generated"] = generated_text; + res["generated"] = generated_text.empty() ? debug_generated_text : generated_text; } } @@ -1442,6 +1443,12 @@ private: res->id_slot = slot.id; res->index = slot.task->index; + + // keep copy of last generated text for debugging purposes + if (slots_debug) { + slot.debug_generated_text = slot.generated_text; + } + // in stream mode, content and tokens are already in last partial chunk if (slot.task->params.stream) { res->content = "";