From 35f62f9eb3a8337b6dc946ce7f24421bc7716465 Mon Sep 17 00:00:00 2001 From: Christopher Albert Date: Mon, 30 Mar 2026 18:24:39 +0200 Subject: [PATCH] server: fix streaming event bugs and tighten test assertions Code fixes: - build_oai_resp_metadata accepts status param; completed_at is null when status is in_progress (was always set to timestamp) - response.created/in_progress events use zeroed usage (was passing actual prompt tokens before response was logically started) - Function call item IDs are now generated once per tool call in update() and reused consistently across output_item.added, function_call_arguments.delta, and output_item.done events (was generating independent random IDs in each path) - Clean up commented-out status checks in server-common.cpp Test fixes: - Assert sequence_number on every event unconditionally (was using weak "if present" guard) - Check actual values not just key presence in streaming created event test (completed_at is None, usage tokens are 0, etc.) Refs: ggml-org/llama.cpp#21174 (patrick review) --- tools/server/server-common.cpp | 8 ++--- tools/server/server-task.cpp | 30 +++++++++++-------- tools/server/server-task.h | 11 +++++-- .../tests/unit/test_compat_oai_responses.py | 16 +++++----- 4 files changed, 36 insertions(+), 29 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 58db4934fe..ae45f24f74 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1280,12 +1280,8 @@ json convert_responses_to_chatcmpl(const json & response_body) { } else if (exists_and_is_array(item, "content") && exists_and_is_string(item, "role") && item.at("role") == "assistant" && - // exists_and_is_string(item, "status") && - // (item.at("status") == "in_progress" || - // item.at("status") == "completed" || - // item.at("status") == "incomplete") && - // item["status"] not sent by codex-cli - // item["type"] == "message" for OutputMessage, absent for EasyInputMessage + // status not checked (not always present, e.g. codex-cli omits it) + // type == "message" for OutputMessage, absent for EasyInputMessage (!item.contains("type") || item.at("type") == "message") ) { // #responses_create-input-input_item_list-item-output_message diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 5d63e6b697..b2de62d86f 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -937,17 +937,18 @@ static json build_oai_resp_metadata(const std::string & oai_resp_id, const std::string & output_text, int n_prompt_tokens, int n_decoded, - int n_prompt_tokens_cache) { + int n_prompt_tokens_cache, + const std::string & status = "completed") { std::time_t t = std::time(0); return json { - {"completed_at", t}, + {"completed_at", status == "completed" ? json(t) : json(nullptr)}, {"created_at", t}, {"id", oai_resp_id}, {"model", oaicompat_model}, {"object", "response"}, {"output", output}, {"output_text", output_text}, - {"status", "completed"}, + {"status", status}, {"usage", json { {"input_tokens", n_prompt_tokens}, {"output_tokens", n_decoded}, @@ -1122,10 +1123,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { output_idx++; } - for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { + for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) { + const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx]; + const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size() + ? oai_resp_fc_item_ids[tc_idx] + : "fc_" + random_string(); // fallback for non-streaming path const json output_item = { {"type", "function_call"}, - {"id", "fc_" + random_string()}, + {"id", fc_id}, {"call_id", tool_call.id}, {"name", tool_call.name}, {"arguments", tool_call.arguments}, @@ -1429,7 +1434,7 @@ void server_task_result_cmpl_partial::update(task_result_state & state) { oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; oai_resp_fc_id = state.oai_resp_fc_id; - // seq_num/output_idx: read from state (may have been advanced by previous to_json call) + oai_resp_fc_item_id = state.oai_resp_fc_item_id; oai_resp_seq_num = state.oai_resp_seq_num; oai_resp_output_idx = state.oai_resp_output_idx; @@ -1460,6 +1465,8 @@ void server_task_result_cmpl_partial::update(task_result_state & state) { } if (!diff.tool_call_delta.name.empty()) { state.oai_resp_fc_id = diff.tool_call_delta.id; + state.oai_resp_fc_item_id = "fc_" + random_string(); + state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id); state.oai_resp_seq_num++; // output_item.added state.oai_resp_output_idx++; } @@ -1610,12 +1617,10 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { int & output_idx = oai_resp_output_idx; if (n_decoded == 1) { - // Build initial response object with all required fields but empty output + // Build initial response object with all required fields but empty output and zeroed usage json initial_resp = build_oai_resp_metadata( oai_resp_id, oaicompat_model, {}, "", - n_prompt_tokens, 0, n_prompt_tokens_cache); - initial_resp["status"] = "in_progress"; - initial_resp["completed_at"] = nullptr; + 0, 0, 0, "in_progress"); events.push_back(json { {"event", "response.created"}, @@ -1723,7 +1728,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { {"sequence_number", seq_num++}, {"output_index", output_idx++}, {"item", json { - {"id", "fc_" + random_string()}, + {"id", oai_resp_fc_item_id}, {"arguments", ""}, {"call_id", diff.tool_call_delta.id}, {"name", diff.tool_call_delta.name}, @@ -1732,7 +1737,6 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { }}, }}, }); - oai_resp_fc_id = diff.tool_call_delta.id; } if (!diff.tool_call_delta.arguments.empty()) { @@ -1743,7 +1747,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { {"sequence_number", seq_num++}, {"output_index", output_idx - 1}, {"delta", diff.tool_call_delta.arguments}, - {"item_id", "fc_" + oai_resp_fc_id}, + {"item_id", oai_resp_fc_item_id}, }}, }); } diff --git a/tools/server/server-task.h b/tools/server/server-task.h index a4ce0449a3..49040445d3 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -109,9 +109,11 @@ struct task_result_state { const std::string oai_resp_id; const std::string oai_resp_reasoning_id; const std::string oai_resp_message_id; - std::string oai_resp_fc_id; // function call ID for current args delta - int oai_resp_seq_num = 0; // monotonically increasing per-stream - int oai_resp_output_idx = 0; // tracks current output item index + std::string oai_resp_fc_id; // model's tool_call ID for current function call + std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call + std::vector oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance + int oai_resp_seq_num = 0; // monotonically increasing per-stream + int oai_resp_output_idx = 0; // tracks current output item index task_result_state(const common_chat_parser_params & chat_parser_params) : chat_parser_params(chat_parser_params) @@ -372,6 +374,7 @@ struct server_task_result_cmpl_final : server_task_result { std::string oai_resp_id; std::string oai_resp_reasoning_id; std::string oai_resp_message_id; + std::vector oai_resp_fc_item_ids; int oai_resp_seq_num = 0; virtual bool is_stop() override { @@ -387,6 +390,7 @@ struct server_task_result_cmpl_final : server_task_result { oai_resp_id = state.oai_resp_id; oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; + oai_resp_fc_item_ids = state.oai_resp_fc_item_ids; oai_resp_seq_num = state.oai_resp_seq_num; } @@ -440,6 +444,7 @@ struct server_task_result_cmpl_partial : server_task_result { std::string oai_resp_reasoning_id; std::string oai_resp_message_id; std::string oai_resp_fc_id; + std::string oai_resp_fc_item_id; int oai_resp_seq_num = 0; int oai_resp_output_idx = 0; diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py index 2f720f0809..fac6310214 100644 --- a/tools/server/tests/unit/test_compat_oai_responses.py +++ b/tools/server/tests/unit/test_compat_oai_responses.py @@ -136,8 +136,8 @@ def test_responses_stream_schema_fields(): saw_output_item_done = False completed_response = None for data in res: - if "sequence_number" in data: - seen_seq_nums.append(data["sequence_number"]) + assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}" + seen_seq_nums.append(data["sequence_number"]) if data.get("type") == "response.output_text.done": saw_output_text_done = True assert "content_index" in data @@ -435,11 +435,13 @@ def test_responses_stream_created_event_has_full_response(): assert resp["id"].startswith("resp_") assert resp["object"] == "response" assert resp["model"] is not None - assert "metadata" in resp - assert "store" in resp - assert "truncation" in resp - assert "tools" in resp - assert "usage" in resp + assert resp["completed_at"] is None + assert resp["metadata"] == {} + assert resp["store"] == False + assert resp["truncation"] == "disabled" + assert resp["tools"] == [] + assert resp["usage"]["input_tokens"] == 0 + assert resp["usage"]["output_tokens"] == 0 assert resp["output"] == [] assert resp["output_text"] == ""