server: add full streaming compliance for Responses API events

- Add sequence_number to ALL streaming events (created, in_progress, output_item.added, content_part.added, all delta events) - Add output_index to all events referencing output items - Add content_index to content-related events - Populate full response object in response.created and response.in_progress events (was only {id, object, status}) - Add id field to function_call output_item.added events - Add status: completed to reasoning output_item.done events - Counter state persisted across streaming chunks via task_result_state Fixes: spec-compliant client libraries (async-openai) that require these fields can now parse all streaming events without error. Refs: ggml-org/llama.cpp#21174 (fumlig review comment)
2026-03-30 18:13:20 +02:00 · 2026-03-30 18:13:20 +02:00 · 987340767c
parent 467266ba4c
commit 987340767c
2 changed files with 80 additions and 31 deletions
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@ -1053,6 +1053,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
                {"type", "reasoning_text"},
            }})},
            {"encrypted_content", ""},
+            {"status",            "completed"},
        };

        server_sent_events.push_back(json {
@ -1428,20 +1429,42 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
    oai_resp_reasoning_id  = state.oai_resp_reasoning_id;
    oai_resp_message_id    = state.oai_resp_message_id;
    oai_resp_fc_id         = state.oai_resp_fc_id;
+    // seq_num/output_idx: read from state (may have been advanced by previous to_json call)
+    oai_resp_seq_num       = state.oai_resp_seq_num;
+    oai_resp_output_idx    = state.oai_resp_output_idx;

    // track if the accumulated message has any reasoning content
    anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();

    // Pre-compute state updates based on diffs (for next chunk)
+    // Also advance seq_num/output_idx to match events that to_json_oaicompat_resp() will emit
+    if (n_decoded == 1) {
+        state.oai_resp_seq_num += 2; // response.created + response.in_progress
+    }
    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
-        if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) {
-            state.thinking_block_started = true;
+        if (!diff.reasoning_content_delta.empty()) {
+            if (!state.thinking_block_started) {
+                state.thinking_block_started = true;
+                state.oai_resp_seq_num++;    // output_item.added
+                state.oai_resp_output_idx++;
+            }
+            state.oai_resp_seq_num++; // reasoning_text.delta
        }
-        if (!diff.content_delta.empty() && !state.text_block_started) {
-            state.text_block_started = true;
+        if (!diff.content_delta.empty()) {
+            if (!state.text_block_started) {
+                state.text_block_started = true;
+                state.oai_resp_seq_num += 2; // output_item.added + content_part.added
+                state.oai_resp_output_idx++;
+            }
+            state.oai_resp_seq_num++; // output_text.delta
        }
        if (!diff.tool_call_delta.name.empty()) {
            state.oai_resp_fc_id = diff.tool_call_delta.id;
+            state.oai_resp_seq_num++;    // output_item.added
+            state.oai_resp_output_idx++;
+        }
+        if (!diff.tool_call_delta.arguments.empty()) {
+            state.oai_resp_seq_num++; // function_call_arguments.delta
        }
    }
 }
@ -1583,28 +1606,31 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {

 json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
    std::vector<json> events;
+    int & seq_num    = oai_resp_seq_num;
+    int & output_idx = oai_resp_output_idx;

    if (n_decoded == 1) {
+        // Build initial response object with all required fields but empty output
+        json initial_resp = build_oai_resp_metadata(
+            oai_resp_id, oaicompat_model, {}, "",
+            n_prompt_tokens, 0, n_prompt_tokens_cache);
+        initial_resp["status"] = "in_progress";
+        initial_resp["completed_at"] = nullptr;
+
        events.push_back(json {
            {"event", "response.created"},
            {"data", json {
-                {"type", "response.created"},
-                {"response", json {
-                    {"id",     oai_resp_id},
-                    {"object", "response"},
-                    {"status", "in_progress"},
-                }},
+                {"type",            "response.created"},
+                {"sequence_number", seq_num++},
+                {"response",        initial_resp},
            }},
        });
        events.push_back(json {
            {"event", "response.in_progress"},
            {"data", json {
-                {"type", "response.in_progress"},
-                {"response", json {
-                    {"id",     oai_resp_id},
-                    {"object", "response"},
-                    {"status", "in_progress"},
-                }},
+                {"type",            "response.in_progress"},
+                {"sequence_number", seq_num++},
+                {"response",        initial_resp},
            }},
        });
    }
@ -1615,7 +1641,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                events.push_back(json {
                    {"event", "response.output_item.added"},
                    {"data", json {
-                        {"type", "response.output_item.added"},
+                        {"type",            "response.output_item.added"},
+                        {"sequence_number", seq_num++},
+                        {"output_index",    output_idx++},
                        {"item", json {
                            {"id",                oai_resp_reasoning_id},
                            {"summary",           json::array()},
@ -1631,9 +1659,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
            events.push_back(json {
                {"event", "response.reasoning_text.delta"},
                {"data", json {
-                    {"type",    "response.reasoning_text.delta"},
-                    {"delta",   diff.reasoning_content_delta},
-                    {"item_id", oai_resp_reasoning_id},
+                    {"type",            "response.reasoning_text.delta"},
+                    {"sequence_number", seq_num++},
+                    {"output_index",    output_idx - 1},
+                    {"content_index",   0},
+                    {"delta",           diff.reasoning_content_delta},
+                    {"item_id",         oai_resp_reasoning_id},
                }},
            });
        }
@ -1643,7 +1674,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                events.push_back(json {
                    {"event", "response.output_item.added"},
                    {"data", json {
-                        {"type", "response.output_item.added"},
+                        {"type",            "response.output_item.added"},
+                        {"sequence_number", seq_num++},
+                        {"output_index",    output_idx++},
                        {"item", json {
                            {"content", json::array()},
                            {"id",      oai_resp_message_id},
@ -1656,8 +1689,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                events.push_back(json {
                    {"event", "response.content_part.added"},
                    {"data", json {
-                        {"type",    "response.content_part.added"},
-                        {"item_id", oai_resp_message_id},
+                        {"type",            "response.content_part.added"},
+                        {"sequence_number", seq_num++},
+                        {"output_index",    output_idx - 1},
+                        {"content_index",   0},
+                        {"item_id",         oai_resp_message_id},
                        {"part", json {
                            {"type", "output_text"},
                            {"text", ""},
@ -1669,9 +1705,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
            events.push_back(json {
                {"event", "response.output_text.delta"},
                {"data", json {
-                    {"type",    "response.output_text.delta"},
-                    {"item_id", oai_resp_message_id},
-                    {"delta",   diff.content_delta},
+                    {"type",            "response.output_text.delta"},
+                    {"sequence_number", seq_num++},
+                    {"output_index",    output_idx - 1},
+                    {"content_index",   0},
+                    {"item_id",         oai_resp_message_id},
+                    {"delta",           diff.content_delta},
                }},
            });
        }
@ -1680,10 +1719,13 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
            events.push_back(json {
                {"event", "response.output_item.added"},
                {"data", json {
-                    {"type",  "response.output_item.added"},
+                    {"type",            "response.output_item.added"},
+                    {"sequence_number", seq_num++},
+                    {"output_index",    output_idx++},
                    {"item", json {
+                        {"id",        "fc_" + random_string()},
                        {"arguments", ""},
-                        {"call_id",   "fc_" + diff.tool_call_delta.id},
+                        {"call_id",   diff.tool_call_delta.id},
                        {"name",      diff.tool_call_delta.name},
                        {"type",      "function_call"},
                        {"status",    "in_progress"},
@ -1697,9 +1739,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
            events.push_back(json {
                {"event", "response.function_call_arguments.delta"},
                {"data", json {
-                    {"type",    "response.function_call_arguments.delta"},
-                    {"delta",   diff.tool_call_delta.arguments},
-                    {"item_id", "fc_" + oai_resp_fc_id},
+                    {"type",            "response.function_call_arguments.delta"},
+                    {"sequence_number", seq_num++},
+                    {"output_index",    output_idx - 1},
+                    {"delta",           diff.tool_call_delta.arguments},
+                    {"item_id",         "fc_" + oai_resp_fc_id},
                }},
            });
        }
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@ -110,6 +110,8 @@ struct task_result_state {
    const std::string oai_resp_reasoning_id;
    const std::string oai_resp_message_id;
    std::string oai_resp_fc_id; // function call ID for current args delta
+    int oai_resp_seq_num    = 0; // monotonically increasing per-stream
+    int oai_resp_output_idx = 0; // tracks current output item index

    task_result_state(const common_chat_parser_params & chat_parser_params)
        : chat_parser_params(chat_parser_params)
@ -385,6 +387,7 @@ struct server_task_result_cmpl_final : server_task_result {
        oai_resp_id = state.oai_resp_id;
        oai_resp_reasoning_id = state.oai_resp_reasoning_id;
        oai_resp_message_id = state.oai_resp_message_id;
+        oai_resp_seq_num = state.oai_resp_seq_num;
    }

    json to_json_non_oaicompat();
@ -437,6 +440,8 @@ struct server_task_result_cmpl_partial : server_task_result {
    std::string oai_resp_reasoning_id;
    std::string oai_resp_message_id;
    std::string oai_resp_fc_id;
+    int         oai_resp_seq_num    = 0;
+    int         oai_resp_output_idx = 0;

    // for Anthropic API: track if any reasoning content has been generated
    bool anthropic_has_reasoning = false;