diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index ceafcac179..bfd0ab747c 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -77,6 +77,7 @@ struct server_slot { size_t last_nl_pos = 0; std::string generated_text; + std::string debug_generated_text; llama_tokens generated_tokens; // idx of draft tokens in the main batch @@ -425,7 +426,7 @@ struct server_slot { if (!only_metrics) { res["prompt"] = ptask->tokens.detokenize(ctx, true); - res["generated"] = generated_text; + res["generated"] = generated_text.empty()?debug_generated_text:generated_text; } } @@ -1442,6 +1443,10 @@ private: res->id_slot = slot.id; res->index = slot.task->index; + + // keep copy of last generated text for debugging purposes + slot.debug_generated_text = slot.generated_text; + // in stream mode, content and tokens are already in last partial chunk if (slot.task->params.stream) { res->content = "";