server: fix streaming event bugs and tighten test assertions

Code fixes: - build_oai_resp_metadata accepts status param; completed_at is null when status is in_progress (was always set to timestamp) - response.created/in_progress events use zeroed usage (was passing actual prompt tokens before response was logically started) - Function call item IDs are now generated once per tool call in update() and reused consistently across output_item.added, function_call_arguments.delta, and output_item.done events (was generating independent random IDs in each path) - Clean up commented-out status checks in server-common.cpp Test fixes: - Assert sequence_number on every event unconditionally (was using weak "if present" guard) - Check actual values not just key presence in streaming created event test (completed_at is None, usage tokens are 0, etc.) Refs: ggml-org/llama.cpp#21174 (patrick review)
2026-03-30 18:24:39 +02:00 · 2026-03-30 18:24:39 +02:00 · 35f62f9eb3
parent 5d51bbef1c
commit 35f62f9eb3
4 changed files with 36 additions and 29 deletions
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@ -1280,12 +1280,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
            } else if (exists_and_is_array(item, "content") &&
                exists_and_is_string(item, "role") &&
                item.at("role") == "assistant" &&
-                // exists_and_is_string(item, "status") &&
-                // (item.at("status") == "in_progress" ||
-                //     item.at("status") == "completed" ||
-                //     item.at("status") == "incomplete") &&
-                // item["status"] not sent by codex-cli
-                // item["type"] == "message" for OutputMessage, absent for EasyInputMessage
+                // status not checked (not always present, e.g. codex-cli omits it)
+                // type == "message" for OutputMessage, absent for EasyInputMessage
                (!item.contains("type") || item.at("type") == "message")
            ) {
                // #responses_create-input-input_item_list-item-output_message
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@ -937,17 +937,18 @@ static json build_oai_resp_metadata(const std::string & oai_resp_id,
                                    const std::string & output_text,
                                    int n_prompt_tokens,
                                    int n_decoded,
-                                    int n_prompt_tokens_cache) {
+                                    int n_prompt_tokens_cache,
+                                    const std::string & status = "completed") {
    std::time_t t = std::time(0);
    return json {
-        {"completed_at",         t},
+        {"completed_at",         status == "completed" ? json(t) : json(nullptr)},
        {"created_at",           t},
        {"id",                   oai_resp_id},
        {"model",                oaicompat_model},
        {"object",               "response"},
        {"output",               output},
        {"output_text",          output_text},
-        {"status",               "completed"},
+        {"status",               status},
        {"usage",                json {
            {"input_tokens",          n_prompt_tokens},
            {"output_tokens",         n_decoded},
@ -1122,10 +1123,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
        output_idx++;
    }

-    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+    for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) {
+        const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx];
+        const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size()
+            ? oai_resp_fc_item_ids[tc_idx]
+            : "fc_" + random_string(); // fallback for non-streaming path
        const json output_item = {
            {"type",      "function_call"},
-            {"id",        "fc_" + random_string()},
+            {"id",        fc_id},
            {"call_id",   tool_call.id},
            {"name",      tool_call.name},
            {"arguments", tool_call.arguments},
@ -1429,7 +1434,7 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
    oai_resp_reasoning_id  = state.oai_resp_reasoning_id;
    oai_resp_message_id    = state.oai_resp_message_id;
    oai_resp_fc_id         = state.oai_resp_fc_id;
-    // seq_num/output_idx: read from state (may have been advanced by previous to_json call)
+    oai_resp_fc_item_id    = state.oai_resp_fc_item_id;
    oai_resp_seq_num       = state.oai_resp_seq_num;
    oai_resp_output_idx    = state.oai_resp_output_idx;

@ -1460,6 +1465,8 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
        }
        if (!diff.tool_call_delta.name.empty()) {
            state.oai_resp_fc_id = diff.tool_call_delta.id;
+            state.oai_resp_fc_item_id = "fc_" + random_string();
+            state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id);
            state.oai_resp_seq_num++;    // output_item.added
            state.oai_resp_output_idx++;
        }
@ -1610,12 +1617,10 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
    int & output_idx = oai_resp_output_idx;

    if (n_decoded == 1) {
-        // Build initial response object with all required fields but empty output
+        // Build initial response object with all required fields but empty output and zeroed usage
        json initial_resp = build_oai_resp_metadata(
            oai_resp_id, oaicompat_model, {}, "",
-            n_prompt_tokens, 0, n_prompt_tokens_cache);
-        initial_resp["status"] = "in_progress";
-        initial_resp["completed_at"] = nullptr;
+            0, 0, 0, "in_progress");

        events.push_back(json {
            {"event", "response.created"},
@ -1723,7 +1728,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                    {"sequence_number", seq_num++},
                    {"output_index",    output_idx++},
                    {"item", json {
-                        {"id",        "fc_" + random_string()},
+                        {"id",        oai_resp_fc_item_id},
                        {"arguments", ""},
                        {"call_id",   diff.tool_call_delta.id},
                        {"name",      diff.tool_call_delta.name},
@ -1732,7 +1737,6 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                    }},
                }},
            });
-            oai_resp_fc_id = diff.tool_call_delta.id;
        }

        if (!diff.tool_call_delta.arguments.empty()) {
@ -1743,7 +1747,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                    {"sequence_number", seq_num++},
                    {"output_index",    output_idx - 1},
                    {"delta",           diff.tool_call_delta.arguments},
-                    {"item_id",         "fc_" + oai_resp_fc_id},
+                    {"item_id",         oai_resp_fc_item_id},
                }},
            });
        }
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@ -109,9 +109,11 @@ struct task_result_state {
    const std::string oai_resp_id;
    const std::string oai_resp_reasoning_id;
    const std::string oai_resp_message_id;
-    std::string oai_resp_fc_id; // function call ID for current args delta
-    int oai_resp_seq_num    = 0; // monotonically increasing per-stream
-    int oai_resp_output_idx = 0; // tracks current output item index
+    std::string oai_resp_fc_id;      // model's tool_call ID for current function call
+    std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call
+    std::vector<std::string> oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance
+    int oai_resp_seq_num    = 0;     // monotonically increasing per-stream
+    int oai_resp_output_idx = 0;     // tracks current output item index

    task_result_state(const common_chat_parser_params & chat_parser_params)
        : chat_parser_params(chat_parser_params)
@ -372,6 +374,7 @@ struct server_task_result_cmpl_final : server_task_result {
    std::string oai_resp_id;
    std::string oai_resp_reasoning_id;
    std::string oai_resp_message_id;
+    std::vector<std::string> oai_resp_fc_item_ids;
    int         oai_resp_seq_num = 0;

    virtual bool is_stop() override {
@ -387,6 +390,7 @@ struct server_task_result_cmpl_final : server_task_result {
        oai_resp_id = state.oai_resp_id;
        oai_resp_reasoning_id = state.oai_resp_reasoning_id;
        oai_resp_message_id = state.oai_resp_message_id;
+        oai_resp_fc_item_ids = state.oai_resp_fc_item_ids;
        oai_resp_seq_num = state.oai_resp_seq_num;
    }

@ -440,6 +444,7 @@ struct server_task_result_cmpl_partial : server_task_result {
    std::string oai_resp_reasoning_id;
    std::string oai_resp_message_id;
    std::string oai_resp_fc_id;
+    std::string oai_resp_fc_item_id;
    int         oai_resp_seq_num    = 0;
    int         oai_resp_output_idx = 0;

--- a/tools/server/tests/unit/test_compat_oai_responses.py
+++ b/tools/server/tests/unit/test_compat_oai_responses.py
@ -136,8 +136,8 @@ def test_responses_stream_schema_fields():
    saw_output_item_done = False
    completed_response = None
    for data in res:
-        if "sequence_number" in data:
-            seen_seq_nums.append(data["sequence_number"])
+        assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}"
+        seen_seq_nums.append(data["sequence_number"])
        if data.get("type") == "response.output_text.done":
            saw_output_text_done = True
            assert "content_index" in data
@ -435,11 +435,13 @@ def test_responses_stream_created_event_has_full_response():
        assert resp["id"].startswith("resp_")
        assert resp["object"] == "response"
        assert resp["model"] is not None
-        assert "metadata" in resp
-        assert "store" in resp
-        assert "truncation" in resp
-        assert "tools" in resp
-        assert "usage" in resp
+        assert resp["completed_at"] is None
+        assert resp["metadata"] == {}
+        assert resp["store"] == False
+        assert resp["truncation"] == "disabled"
+        assert resp["tools"] == []
+        assert resp["usage"]["input_tokens"] == 0
+        assert resp["usage"]["output_tokens"] == 0
        assert resp["output"] == []
        assert resp["output_text"] == ""