From 35f62f9eb3a8337b6dc946ce7f24421bc7716465 Mon Sep 17 00:00:00 2001
From: Christopher Albert <albert@tugraz.at>
Date: Mon, 30 Mar 2026 18:24:39 +0200
Subject: [PATCH] server: fix streaming event bugs and tighten test assertions

Code fixes:
- build_oai_resp_metadata accepts status param; completed_at is null
  when status is in_progress (was always set to timestamp)
- response.created/in_progress events use zeroed usage (was passing
  actual prompt tokens before response was logically started)
- Function call item IDs are now generated once per tool call in
  update() and reused consistently across output_item.added,
  function_call_arguments.delta, and output_item.done events
  (was generating independent random IDs in each path)
- Clean up commented-out status checks in server-common.cpp

Test fixes:
- Assert sequence_number on every event unconditionally (was using
  weak "if present" guard)
- Check actual values not just key presence in streaming created
  event test (completed_at is None, usage tokens are 0, etc.)

Refs: ggml-org/llama.cpp#21174 (patrick review)
---
 tools/server/server-common.cpp                |  8 ++---
 tools/server/server-task.cpp                  | 30 +++++++++++--------
 tools/server/server-task.h                    | 11 +++++--
 .../tests/unit/test_compat_oai_responses.py   | 16 +++++-----
 4 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 58db4934fe..ae45f24f74 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1280,12 +1280,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
             } else if (exists_and_is_array(item, "content") &&
                 exists_and_is_string(item, "role") &&
                 item.at("role") == "assistant" &&
-                // exists_and_is_string(item, "status") &&
-                // (item.at("status") == "in_progress" ||
-                //     item.at("status") == "completed" ||
-                //     item.at("status") == "incomplete") &&
-                // item["status"] not sent by codex-cli
-                // item["type"] == "message" for OutputMessage, absent for EasyInputMessage
+                // status not checked (not always present, e.g. codex-cli omits it)
+                // type == "message" for OutputMessage, absent for EasyInputMessage
                 (!item.contains("type") || item.at("type") == "message")
             ) {
                 // #responses_create-input-input_item_list-item-output_message
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 5d63e6b697..b2de62d86f 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -937,17 +937,18 @@ static json build_oai_resp_metadata(const std::string & oai_resp_id,
                                     const std::string & output_text,
                                     int n_prompt_tokens,
                                     int n_decoded,
-                                    int n_prompt_tokens_cache) {
+                                    int n_prompt_tokens_cache,
+                                    const std::string & status = "completed") {
     std::time_t t = std::time(0);
     return json {
-        {"completed_at",         t},
+        {"completed_at",         status == "completed" ? json(t) : json(nullptr)},
         {"created_at",           t},
         {"id",                   oai_resp_id},
         {"model",                oaicompat_model},
         {"object",               "response"},
         {"output",               output},
         {"output_text",          output_text},
-        {"status",               "completed"},
+        {"status",               status},
         {"usage",                json {
             {"input_tokens",          n_prompt_tokens},
             {"output_tokens",         n_decoded},
@@ -1122,10 +1123,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
         output_idx++;
     }
 
-    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+    for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) {
+        const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx];
+        const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size()
+            ? oai_resp_fc_item_ids[tc_idx]
+            : "fc_" + random_string(); // fallback for non-streaming path
         const json output_item = {
             {"type",      "function_call"},
-            {"id",        "fc_" + random_string()},
+            {"id",        fc_id},
             {"call_id",   tool_call.id},
             {"name",      tool_call.name},
             {"arguments", tool_call.arguments},
@@ -1429,7 +1434,7 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
     oai_resp_reasoning_id  = state.oai_resp_reasoning_id;
     oai_resp_message_id    = state.oai_resp_message_id;
     oai_resp_fc_id         = state.oai_resp_fc_id;
-    // seq_num/output_idx: read from state (may have been advanced by previous to_json call)
+    oai_resp_fc_item_id    = state.oai_resp_fc_item_id;
     oai_resp_seq_num       = state.oai_resp_seq_num;
     oai_resp_output_idx    = state.oai_resp_output_idx;
 
@@ -1460,6 +1465,8 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
         }
         if (!diff.tool_call_delta.name.empty()) {
             state.oai_resp_fc_id = diff.tool_call_delta.id;
+            state.oai_resp_fc_item_id = "fc_" + random_string();
+            state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id);
             state.oai_resp_seq_num++;    // output_item.added
             state.oai_resp_output_idx++;
         }
@@ -1610,12 +1617,10 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
     int & output_idx = oai_resp_output_idx;
 
     if (n_decoded == 1) {
-        // Build initial response object with all required fields but empty output
+        // Build initial response object with all required fields but empty output and zeroed usage
         json initial_resp = build_oai_resp_metadata(
             oai_resp_id, oaicompat_model, {}, "",
-            n_prompt_tokens, 0, n_prompt_tokens_cache);
-        initial_resp["status"] = "in_progress";
-        initial_resp["completed_at"] = nullptr;
+            0, 0, 0, "in_progress");
 
         events.push_back(json {
             {"event", "response.created"},
@@ -1723,7 +1728,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                     {"sequence_number", seq_num++},
                     {"output_index",    output_idx++},
                     {"item", json {
-                        {"id",        "fc_" + random_string()},
+                        {"id",        oai_resp_fc_item_id},
                         {"arguments", ""},
                         {"call_id",   diff.tool_call_delta.id},
                         {"name",      diff.tool_call_delta.name},
@@ -1732,7 +1737,6 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                     }},
                 }},
             });
-            oai_resp_fc_id = diff.tool_call_delta.id;
         }
 
         if (!diff.tool_call_delta.arguments.empty()) {
@@ -1743,7 +1747,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                     {"sequence_number", seq_num++},
                     {"output_index",    output_idx - 1},
                     {"delta",           diff.tool_call_delta.arguments},
-                    {"item_id",         "fc_" + oai_resp_fc_id},
+                    {"item_id",         oai_resp_fc_item_id},
                 }},
             });
         }
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index a4ce0449a3..49040445d3 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -109,9 +109,11 @@ struct task_result_state {
     const std::string oai_resp_id;
     const std::string oai_resp_reasoning_id;
     const std::string oai_resp_message_id;
-    std::string oai_resp_fc_id; // function call ID for current args delta
-    int oai_resp_seq_num    = 0; // monotonically increasing per-stream
-    int oai_resp_output_idx = 0; // tracks current output item index
+    std::string oai_resp_fc_id;      // model's tool_call ID for current function call
+    std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call
+    std::vector<std::string> oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance
+    int oai_resp_seq_num    = 0;     // monotonically increasing per-stream
+    int oai_resp_output_idx = 0;     // tracks current output item index
 
     task_result_state(const common_chat_parser_params & chat_parser_params)
         : chat_parser_params(chat_parser_params)
@@ -372,6 +374,7 @@ struct server_task_result_cmpl_final : server_task_result {
     std::string oai_resp_id;
     std::string oai_resp_reasoning_id;
     std::string oai_resp_message_id;
+    std::vector<std::string> oai_resp_fc_item_ids;
     int         oai_resp_seq_num = 0;
 
     virtual bool is_stop() override {
@@ -387,6 +390,7 @@ struct server_task_result_cmpl_final : server_task_result {
         oai_resp_id = state.oai_resp_id;
         oai_resp_reasoning_id = state.oai_resp_reasoning_id;
         oai_resp_message_id = state.oai_resp_message_id;
+        oai_resp_fc_item_ids = state.oai_resp_fc_item_ids;
         oai_resp_seq_num = state.oai_resp_seq_num;
     }
 
@@ -440,6 +444,7 @@ struct server_task_result_cmpl_partial : server_task_result {
     std::string oai_resp_reasoning_id;
     std::string oai_resp_message_id;
     std::string oai_resp_fc_id;
+    std::string oai_resp_fc_item_id;
     int         oai_resp_seq_num    = 0;
     int         oai_resp_output_idx = 0;
 
diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py
index 2f720f0809..fac6310214 100644
--- a/tools/server/tests/unit/test_compat_oai_responses.py
+++ b/tools/server/tests/unit/test_compat_oai_responses.py
@@ -136,8 +136,8 @@ def test_responses_stream_schema_fields():
     saw_output_item_done = False
     completed_response = None
     for data in res:
-        if "sequence_number" in data:
-            seen_seq_nums.append(data["sequence_number"])
+        assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}"
+        seen_seq_nums.append(data["sequence_number"])
         if data.get("type") == "response.output_text.done":
             saw_output_text_done = True
             assert "content_index" in data
@@ -435,11 +435,13 @@ def test_responses_stream_created_event_has_full_response():
         assert resp["id"].startswith("resp_")
         assert resp["object"] == "response"
         assert resp["model"] is not None
-        assert "metadata" in resp
-        assert "store" in resp
-        assert "truncation" in resp
-        assert "tools" in resp
-        assert "usage" in resp
+        assert resp["completed_at"] is None
+        assert resp["metadata"] == {}
+        assert resp["store"] == False
+        assert resp["truncation"] == "disabled"
+        assert resp["tools"] == []
+        assert resp["usage"]["input_tokens"] == 0
+        assert resp["usage"]["output_tokens"] == 0
         assert resp["output"] == []
         assert resp["output_text"] == ""