Merge adef64cb9f into 6b949d1078

2026-04-01 14:05:51 +03:00 · 2026-04-01 14:05:51 +03:00 · 53ecea86b5
parent 6b949d1078 adef64cb9f
commit 53ecea86b5
4 changed files with 800 additions and 121 deletions
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@ -1255,45 +1255,59 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                if (item.contains("status")) {
                    item.erase("status");
                }
+                // Merge system/developer messages into the first system message.
+                // Many model templates (e.g. Qwen) require all system content at
+                // position 0 and reject system messages elsewhere in the conversation.
+                if (item.at("role") == "system" || item.at("role") == "developer") {
+                    if (!chatcmpl_messages.empty() && chatcmpl_messages[0].value("role", "") == "system") {
+                        auto & first_msg = chatcmpl_messages[0];
+                        // Convert string content to array format if needed
+                        if (first_msg["content"].is_string()) {
+                            std::string old_text = first_msg["content"].get<std::string>();
+                            first_msg["content"] = json::array({json{{"text", old_text}, {"type", "text"}}});
+                        }
+                        auto & first_content = first_msg["content"];
+                        for (const auto & part : chatcmpl_content) {
+                            first_content.push_back(part);
+                        }
+                        continue; // merged, don't push a separate message
+                    }
+                    item["role"] = "system";
+                }
                item["content"] = chatcmpl_content;

                chatcmpl_messages.push_back(item);
            } else if (exists_and_is_array(item, "content") &&
                exists_and_is_string(item, "role") &&
                item.at("role") == "assistant" &&
-                // exists_and_is_string(item, "status") &&
-                // (item.at("status") == "in_progress" ||
-                //     item.at("status") == "completed" ||
-                //     item.at("status") == "incomplete") &&
-                // item["status"] not sent by codex-cli
-                exists_and_is_string(item, "type") &&
-                item.at("type") == "message"
+                // status not checked (not always present, e.g. codex-cli omits it)
+                // type == "message" for OutputMessage, absent for EasyInputMessage
+                (!item.contains("type") || item.at("type") == "message")
            ) {
                // #responses_create-input-input_item_list-item-output_message
-                auto chatcmpl_content = json::array();
+                // Also handles AssistantMessageItemParam / EasyInputMessage with role "assistant"
+                std::vector<json> chatcmpl_content;

                for (const auto & output_text : item.at("content")) {
                    const std::string type = json_value(output_text, "type", std::string());
-                    if (type == "output_text") {
+                    if (type == "output_text" || type == "input_text") {
                        if (!exists_and_is_string(output_text, "text")) {
                            throw std::invalid_argument("'Output text' requires 'text'");
-                            // Ignore annotations and logprobs for now
-                            chatcmpl_content.push_back({
-                                {"text", output_text.at("text")},
-                                {"type", "text"},
-                            });
                        }
+                        chatcmpl_content.push_back({
+                            {"text", output_text.at("text")},
+                            {"type", "text"},
+                        });
                    } else if (type == "refusal") {
                        if (!exists_and_is_string(output_text, "refusal")) {
                            throw std::invalid_argument("'Refusal' requires 'refusal'");
-                            // Ignore annotations and logprobs for now
-                            chatcmpl_content.push_back({
-                                {"refusal", output_text.at("refusal")},
-                                {"type", "refusal"},
-                            });
                        }
+                        chatcmpl_content.push_back({
+                            {"refusal", output_text.at("refusal")},
+                            {"type", "refusal"},
+                        });
                    } else {
-                        throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'");
+                        throw std::invalid_argument("'type' must be 'output_text', 'input_text', or 'refusal'");
                    }
                }

@ -1303,7 +1317,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                        prev_msg["content"] = json::array();
                    }
                    auto & prev_content = prev_msg["content"];
-                    prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end());
+                    for (const auto & part : chatcmpl_content) {
+                        prev_content.push_back(part);
+                    }
                } else {
                    item.erase("status");
                    item.erase("type");
@ -1371,24 +1387,30 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                item.at("type") == "reasoning") {
                // #responses_create-input-input_item_list-item-reasoning

-                if (!exists_and_is_array(item, "content")) {
-                    throw std::invalid_argument("item['content'] is not an array");
-                }
-                if (item.at("content").empty()) {
-                    throw std::invalid_argument("item['content'] is empty");
-                }
-                if (!exists_and_is_string(item.at("content")[0], "text")) {
-                    throw std::invalid_argument("item['content']['text'] is not a string");
+                // content can be: null, omitted, a string, or array of {type, text} objects.
+                // Codex may send content:null or omit it entirely (issue openai/codex#11834).
+                // OpenCode may send content as a plain string.
+                // The spec uses array format: [{"type":"reasoning_text","text":"..."}].
+                // encrypted_content (opaque string) is accepted but ignored for local models.
+                std::string reasoning_text;
+                if (!item.contains("content") || item.at("content").is_null()) {
+                    // null or missing content — skip (encrypted_content only, or empty reasoning)
+                } else if (item.at("content").is_string()) {
+                    reasoning_text = item.at("content").get<std::string>();
+                } else if (item.at("content").is_array() && !item.at("content").empty()
+                           && exists_and_is_string(item.at("content")[0], "text")) {
+                    reasoning_text = item.at("content")[0].at("text").get<std::string>();
                }
+                // else: empty array or unrecognized format — treat as empty reasoning

                if (merge_prev) {
                    auto & prev_msg = chatcmpl_messages.back();
-                    prev_msg["reasoning_content"] = item.at("content")[0].at("text");
+                    prev_msg["reasoning_content"] = reasoning_text;
                } else {
                    chatcmpl_messages.push_back(json {
                        {"role", "assistant"},
                        {"content", json::array()},
-                        {"reasoning_content", item.at("content")[0].at("text")},
+                        {"reasoning_content", reasoning_text},
                    });
                }
            } else {
@ -1407,11 +1429,17 @@ json convert_responses_to_chatcmpl(const json & response_body) {
        }
        std::vector<json> chatcmpl_tools;
        for (json resp_tool : response_body.at("tools")) {
-            json chatcmpl_tool;
+            const std::string tool_type = json_value(resp_tool, "type", std::string());

-            if (json_value(resp_tool, "type", std::string()) != "function") {
-                throw std::invalid_argument("'type' of tool must be 'function'");
+            // Skip non-function tools (e.g. web_search, code_interpreter)
+            // sent by clients like Codex CLI — these are provider-specific
+            // and cannot be converted to chat completions function tools
+            if (tool_type != "function") {
+                SRV_WRN("skipping unsupported tool type '%s' in Responses conversion\n", tool_type.c_str());
+                continue;
            }
+
+            json chatcmpl_tool;
            resp_tool.erase("type");
            chatcmpl_tool["type"] = "function";

@ -1422,7 +1450,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
            chatcmpl_tools.push_back(chatcmpl_tool);
        }
        chatcmpl_body.erase("tools");
-        chatcmpl_body["tools"] = chatcmpl_tools;
+        if (!chatcmpl_tools.empty()) {
+            chatcmpl_body["tools"] = chatcmpl_tools;
+        }
    }

    if (response_body.contains("max_output_tokens")) {
@ -1430,6 +1460,15 @@ json convert_responses_to_chatcmpl(const json & response_body) {
        chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
    }

+    // Strip Responses-only keys that have no chat completions equivalent
+    // (e.g. Codex CLI sends store, include, prompt_cache_key, web_search)
+    for (const char * key : {
+        "store", "include", "prompt_cache_key", "web_search",
+        "text", "truncation", "metadata",
+    }) {
+        chatcmpl_body.erase(key);
+    }
+
    return chatcmpl_body;
 }

--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@ -917,6 +917,71 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
    return deltas;
 }

+static std::string build_output_text(const std::vector<json> & output) {
+    std::string result;
+    for (const auto & item : output) {
+        if (json_value(item, "type", std::string()) == "message") {
+            for (const auto & part : item.at("content")) {
+                if (json_value(part, "type", std::string()) == "output_text") {
+                    result += part.at("text").get<std::string>();
+                }
+            }
+        }
+    }
+    return result;
+}
+
+static json build_oai_resp_metadata(const std::string & oai_resp_id,
+                                    const std::string & oaicompat_model,
+                                    const std::vector<json> & output,
+                                    const std::string & output_text,
+                                    int n_prompt_tokens,
+                                    int n_decoded,
+                                    int n_prompt_tokens_cache,
+                                    const std::string & status = "completed") {
+    std::time_t t = std::time(0);
+    return json {
+        {"completed_at",         status == "completed" ? json(t) : json(nullptr)},
+        {"created_at",           t},
+        {"id",                   oai_resp_id},
+        {"model",                oaicompat_model},
+        {"object",               "response"},
+        {"output",               output},
+        {"output_text",          output_text},
+        {"status",               status},
+        {"usage",                json {
+            {"input_tokens",          n_prompt_tokens},
+            {"output_tokens",         n_decoded},
+            {"total_tokens",          n_decoded + n_prompt_tokens},
+            {"input_tokens_details",  json{{"cached_tokens", n_prompt_tokens_cache}}},
+            {"output_tokens_details", json{{"reasoning_tokens", 0}}},
+        }},
+        {"incomplete_details",   nullptr},
+        {"previous_response_id", nullptr},
+        {"instructions",         nullptr},
+        {"error",                nullptr},
+        {"tools",                json::array()},
+        {"tool_choice",          "auto"},
+        {"truncation",           "disabled"},
+        {"parallel_tool_calls",  false},
+        {"text",                 json{{"format", json{{"type", "text"}}}}},
+        {"top_p",                1.0},
+        {"presence_penalty",     0.0},
+        {"frequency_penalty",    0.0},
+        {"top_logprobs",         0},
+        {"temperature",          1.0},
+        {"reasoning",            nullptr},
+        {"max_output_tokens",    nullptr},
+        {"max_tool_calls",       nullptr},
+        {"store",                false},
+        {"background",           false},
+        {"service_tier",         "default"},
+        {"safety_identifier",    nullptr},
+        {"prompt_cache_key",     nullptr},
+        {"metadata",             json::object()},
+    };
+}
+
 json server_task_result_cmpl_final::to_json_oaicompat_resp() {
    common_chat_msg msg;
    if (!oaicompat_msg.empty()) {
@ -960,36 +1025,24 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
        output.push_back(json {
            {"type",      "function_call"},
-            {"status",    "completed"},
-            {"arguments", tool_call.arguments},
-            {"call_id",   "fc_" + tool_call.id},
+            {"id",        "fc_" + random_string()},
+            {"call_id",   tool_call.id},
            {"name",      tool_call.name},
+            {"arguments", tool_call.arguments},
+            {"status",    "completed"},
        });
    }

-    std::time_t t = std::time(0);
-    json res = {
-        {"completed_at", t},
-        {"created_at",   t},
-        {"id",           oai_resp_id},
-        {"model",        oaicompat_model},
-        {"object",       "response"},
-        {"output",       output},
-        {"status",       "completed"},
-        {"usage",        json {
-            {"input_tokens",  n_prompt_tokens},
-            {"output_tokens", n_decoded},
-            {"total_tokens",  n_decoded + n_prompt_tokens},
-            {"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
-        }},
-    };
-
-    return res;
+    std::string output_text = build_output_text(output);
+    return build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
+                                   n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
 }

 json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
    std::vector<json> server_sent_events;
    std::vector<json> output;
+    int & seq_num = oai_resp_seq_num;
+    int output_idx = 0;

    if (oaicompat_msg.reasoning_content != "") {
        const json output_item = json {
@ -1001,25 +1054,33 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
                {"type", "reasoning_text"},
            }})},
            {"encrypted_content", ""},
+            {"status",            "completed"},
        };

        server_sent_events.push_back(json {
            {"event", "response.output_item.done"},
            {"data", json {
-                {"type", "response.output_item.done"},
-                {"item", output_item}
+                {"type",            "response.output_item.done"},
+                {"sequence_number", seq_num++},
+                {"output_index",    output_idx},
+                {"item",            output_item},
            }}
        });
        output.push_back(output_item);
+        output_idx++;
    }

    if (oaicompat_msg.content != "") {
        server_sent_events.push_back(json {
            {"event", "response.output_text.done"},
            {"data", json {
-                {"type",    "response.output_text.done"},
-                {"item_id", oai_resp_message_id},
-                {"text",    oaicompat_msg.content}
+                {"type",            "response.output_text.done"},
+                {"sequence_number", seq_num++},
+                {"output_index",    output_idx},
+                {"content_index",   0},
+                {"item_id",         oai_resp_message_id},
+                {"text",            oaicompat_msg.content},
+                {"logprobs",        json::array()},
            }}
        });

@ -1033,9 +1094,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
        server_sent_events.push_back(json {
            {"event", "response.content_part.done"},
            {"data", json {
-                {"type",    "response.content_part.done"},
-                {"item_id", oai_resp_message_id},
-                {"part",    content_part}
+                {"type",            "response.content_part.done"},
+                {"sequence_number", seq_num++},
+                {"output_index",    output_idx},
+                {"content_index",   0},
+                {"item_id",         oai_resp_message_id},
+                {"part",            content_part},
            }}
        });
        const json output_item = {
@ -1049,50 +1113,52 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
        server_sent_events.push_back(json {
            {"event", "response.output_item.done"},
            {"data", json {
-                {"type", "response.output_item.done"},
-                {"item", output_item}
+                {"type",            "response.output_item.done"},
+                {"sequence_number", seq_num++},
+                {"output_index",    output_idx},
+                {"item",            output_item},
            }}
        });
        output.push_back(output_item);
+        output_idx++;
    }

-    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+    for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) {
+        const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx];
+        const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size()
+            ? oai_resp_fc_item_ids[tc_idx]
+            : "fc_" + random_string(); // fallback for non-streaming path
        const json output_item = {
            {"type",      "function_call"},
-            {"status",    "completed"},
+            {"id",        fc_id},
+            {"call_id",   tool_call.id},
+            {"name",      tool_call.name},
            {"arguments", tool_call.arguments},
-            {"call_id",   "fc_" + tool_call.id},
-            {"name",      tool_call.name}
+            {"status",    "completed"},
        };
        server_sent_events.push_back(json {
            {"event", "response.output_item.done"},
            {"data", json {
-                {"type", "response.output_item.done"},
-                {"item", output_item}
+                {"type",            "response.output_item.done"},
+                {"sequence_number", seq_num++},
+                {"output_index",    output_idx},
+                {"item",            output_item},
            }}
        });
        output.push_back(output_item);
+        output_idx++;
    }

-    std::time_t t = std::time(0);
+    std::string output_text = build_output_text(output);
+    json resp = build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
+                                        n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
+
    server_sent_events.push_back(json {
        {"event", "response.completed"},
        {"data", json {
-            {"type", "response.completed"},
-            {"response", json {
-                {"id",         oai_resp_id},
-                {"object",     "response"},
-                {"created_at", t},
-                {"status",     "completed"},
-                {"model",      oaicompat_model},
-                {"output",     output},
-                {"usage",      json {
-                    {"input_tokens",  n_prompt_tokens},
-                    {"output_tokens", n_decoded},
-                    {"total_tokens",  n_decoded + n_prompt_tokens},
-                    {"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
-                }}
-            }},
+            {"type",            "response.completed"},
+            {"sequence_number", seq_num++},
+            {"response",        resp},
        }}
    });

@ -1368,20 +1434,44 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
    oai_resp_reasoning_id  = state.oai_resp_reasoning_id;
    oai_resp_message_id    = state.oai_resp_message_id;
    oai_resp_fc_id         = state.oai_resp_fc_id;
+    oai_resp_fc_item_id    = state.oai_resp_fc_item_id;
+    oai_resp_seq_num       = state.oai_resp_seq_num;
+    oai_resp_output_idx    = state.oai_resp_output_idx;

    // track if the accumulated message has any reasoning content
    anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();

    // Pre-compute state updates based on diffs (for next chunk)
+    // Also advance seq_num/output_idx to match events that to_json_oaicompat_resp() will emit
+    if (n_decoded == 1) {
+        state.oai_resp_seq_num += 2; // response.created + response.in_progress
+    }
    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
-        if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) {
-            state.thinking_block_started = true;
+        if (!diff.reasoning_content_delta.empty()) {
+            if (!state.thinking_block_started) {
+                state.thinking_block_started = true;
+                state.oai_resp_seq_num++;    // output_item.added
+                state.oai_resp_output_idx++;
+            }
+            state.oai_resp_seq_num++; // reasoning_text.delta
        }
-        if (!diff.content_delta.empty() && !state.text_block_started) {
-            state.text_block_started = true;
+        if (!diff.content_delta.empty()) {
+            if (!state.text_block_started) {
+                state.text_block_started = true;
+                state.oai_resp_seq_num += 2; // output_item.added + content_part.added
+                state.oai_resp_output_idx++;
+            }
+            state.oai_resp_seq_num++; // output_text.delta
        }
        if (!diff.tool_call_delta.name.empty()) {
            state.oai_resp_fc_id = diff.tool_call_delta.id;
+            state.oai_resp_fc_item_id = "fc_" + random_string();
+            state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id);
+            state.oai_resp_seq_num++;    // output_item.added
+            state.oai_resp_output_idx++;
+        }
+        if (!diff.tool_call_delta.arguments.empty()) {
+            state.oai_resp_seq_num++; // function_call_arguments.delta
        }
    }
 }
@ -1523,28 +1613,29 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {

 json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
    std::vector<json> events;
+    int & seq_num    = oai_resp_seq_num;
+    int & output_idx = oai_resp_output_idx;

    if (n_decoded == 1) {
+        // Build initial response object with all required fields but empty output and zeroed usage
+        json initial_resp = build_oai_resp_metadata(
+            oai_resp_id, oaicompat_model, {}, "",
+            0, 0, 0, "in_progress");
+
        events.push_back(json {
            {"event", "response.created"},
            {"data", json {
-                {"type", "response.created"},
-                {"response", json {
-                    {"id",     oai_resp_id},
-                    {"object", "response"},
-                    {"status", "in_progress"},
-                }},
+                {"type",            "response.created"},
+                {"sequence_number", seq_num++},
+                {"response",        initial_resp},
            }},
        });
        events.push_back(json {
            {"event", "response.in_progress"},
            {"data", json {
-                {"type", "response.in_progress"},
-                {"response", json {
-                    {"id",     oai_resp_id},
-                    {"object", "response"},
-                    {"status", "in_progress"},
-                }},
+                {"type",            "response.in_progress"},
+                {"sequence_number", seq_num++},
+                {"response",        initial_resp},
            }},
        });
    }
@ -1555,7 +1646,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                events.push_back(json {
                    {"event", "response.output_item.added"},
                    {"data", json {
-                        {"type", "response.output_item.added"},
+                        {"type",            "response.output_item.added"},
+                        {"sequence_number", seq_num++},
+                        {"output_index",    output_idx++},
                        {"item", json {
                            {"id",                oai_resp_reasoning_id},
                            {"summary",           json::array()},
@ -1571,9 +1664,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
            events.push_back(json {
                {"event", "response.reasoning_text.delta"},
                {"data", json {
-                    {"type",    "response.reasoning_text.delta"},
-                    {"delta",   diff.reasoning_content_delta},
-                    {"item_id", oai_resp_reasoning_id},
+                    {"type",            "response.reasoning_text.delta"},
+                    {"sequence_number", seq_num++},
+                    {"output_index",    output_idx - 1},
+                    {"content_index",   0},
+                    {"delta",           diff.reasoning_content_delta},
+                    {"item_id",         oai_resp_reasoning_id},
                }},
            });
        }
@ -1583,7 +1679,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                events.push_back(json {
                    {"event", "response.output_item.added"},
                    {"data", json {
-                        {"type", "response.output_item.added"},
+                        {"type",            "response.output_item.added"},
+                        {"sequence_number", seq_num++},
+                        {"output_index",    output_idx++},
                        {"item", json {
                            {"content", json::array()},
                            {"id",      oai_resp_message_id},
@ -1596,8 +1694,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                events.push_back(json {
                    {"event", "response.content_part.added"},
                    {"data", json {
-                        {"type",    "response.content_part.added"},
-                        {"item_id", oai_resp_message_id},
+                        {"type",            "response.content_part.added"},
+                        {"sequence_number", seq_num++},
+                        {"output_index",    output_idx - 1},
+                        {"content_index",   0},
+                        {"item_id",         oai_resp_message_id},
                        {"part", json {
                            {"type", "output_text"},
                            {"text", ""},
@ -1609,9 +1710,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
            events.push_back(json {
                {"event", "response.output_text.delta"},
                {"data", json {
-                    {"type",    "response.output_text.delta"},
-                    {"item_id", oai_resp_message_id},
-                    {"delta",   diff.content_delta},
+                    {"type",            "response.output_text.delta"},
+                    {"sequence_number", seq_num++},
+                    {"output_index",    output_idx - 1},
+                    {"content_index",   0},
+                    {"item_id",         oai_resp_message_id},
+                    {"delta",           diff.content_delta},
                }},
            });
        }
@ -1620,26 +1724,30 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
            events.push_back(json {
                {"event", "response.output_item.added"},
                {"data", json {
-                    {"type",  "response.output_item.added"},
+                    {"type",            "response.output_item.added"},
+                    {"sequence_number", seq_num++},
+                    {"output_index",    output_idx++},
                    {"item", json {
+                        {"id",        oai_resp_fc_item_id},
                        {"arguments", ""},
-                        {"call_id",   "fc_" + diff.tool_call_delta.id},
+                        {"call_id",   diff.tool_call_delta.id},
                        {"name",      diff.tool_call_delta.name},
                        {"type",      "function_call"},
                        {"status",    "in_progress"},
                    }},
                }},
            });
-            oai_resp_fc_id = diff.tool_call_delta.id;
        }

        if (!diff.tool_call_delta.arguments.empty()) {
            events.push_back(json {
                {"event", "response.function_call_arguments.delta"},
                {"data", json {
-                    {"type",    "response.function_call_arguments.delta"},
-                    {"delta",   diff.tool_call_delta.arguments},
-                    {"item_id", "fc_" + oai_resp_fc_id},
+                    {"type",            "response.function_call_arguments.delta"},
+                    {"sequence_number", seq_num++},
+                    {"output_index",    output_idx - 1},
+                    {"delta",           diff.tool_call_delta.arguments},
+                    {"item_id",         oai_resp_fc_item_id},
                }},
            });
        }
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@ -109,7 +109,11 @@ struct task_result_state {
    const std::string oai_resp_id;
    const std::string oai_resp_reasoning_id;
    const std::string oai_resp_message_id;
-    std::string oai_resp_fc_id; // function call ID for current args delta
+    std::string oai_resp_fc_id;      // model's tool_call ID for current function call
+    std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call
+    std::vector<std::string> oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance
+    int oai_resp_seq_num    = 0;     // monotonically increasing per-stream
+    int oai_resp_output_idx = 0;     // tracks current output item index

    task_result_state(const common_chat_parser_params & chat_parser_params)
        : chat_parser_params(chat_parser_params)
@ -370,6 +374,8 @@ struct server_task_result_cmpl_final : server_task_result {
    std::string oai_resp_id;
    std::string oai_resp_reasoning_id;
    std::string oai_resp_message_id;
+    std::vector<std::string> oai_resp_fc_item_ids;
+    int         oai_resp_seq_num = 0;

    virtual bool is_stop() override {
        return true; // in stream mode, final responses are considered stop
@ -384,6 +390,8 @@ struct server_task_result_cmpl_final : server_task_result {
        oai_resp_id = state.oai_resp_id;
        oai_resp_reasoning_id = state.oai_resp_reasoning_id;
        oai_resp_message_id = state.oai_resp_message_id;
+        oai_resp_fc_item_ids = state.oai_resp_fc_item_ids;
+        oai_resp_seq_num = state.oai_resp_seq_num;
    }

    json to_json_non_oaicompat();
@ -436,6 +444,9 @@ struct server_task_result_cmpl_partial : server_task_result {
    std::string oai_resp_reasoning_id;
    std::string oai_resp_message_id;
    std::string oai_resp_fc_id;
+    std::string oai_resp_fc_item_id;
+    int         oai_resp_seq_num    = 0;
+    int         oai_resp_output_idx = 0;

    // for Anthropic API: track if any reasoning content has been generated
    bool anthropic_has_reasoning = false;
--- a/tools/server/tests/unit/test_compat_oai_responses.py
+++ b/tools/server/tests/unit/test_compat_oai_responses.py
@ -71,3 +71,524 @@ def test_responses_stream_with_openai_library():
            assert r.response.output[0].id.startswith("msg_")
            assert gathered_text == r.response.output_text
            assert match_regex("(Suddenly)+", r.response.output_text)
+
+
+def test_responses_schema_fields():
+    """Verify the 24 Response object fields added by this PR are present
+    with correct types and default values. These fields are required by
+    the OpenAI Responses API spec but were missing before this change."""
+    global server
+    server.start()
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": "Book",
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert res.status_code == 200
+    body = res.body
+    # Usage sub-fields added by this PR
+    usage = body["usage"]
+    assert isinstance(usage["input_tokens_details"]["cached_tokens"], int)
+    assert isinstance(usage["output_tokens_details"]["reasoning_tokens"], int)
+    # All 24 fields added by this PR must be present with correct defaults
+    assert body["incomplete_details"] is None
+    assert body["previous_response_id"] is None
+    assert body["instructions"] is None
+    assert body["error"] is None
+    assert body["tools"] == []
+    assert body["tool_choice"] == "auto"
+    assert body["truncation"] == "disabled"
+    assert body["parallel_tool_calls"] == False
+    assert body["text"] == {"format": {"type": "text"}}
+    assert body["top_p"] == 1.0
+    assert body["temperature"] == 1.0
+    assert body["presence_penalty"] == 0.0
+    assert body["frequency_penalty"] == 0.0
+    assert body["top_logprobs"] == 0
+    assert body["reasoning"] is None
+    assert body["max_output_tokens"] is None
+    assert body["store"] == False
+    assert body["service_tier"] == "default"
+    assert body["metadata"] == {}
+    assert body["background"] == False
+    assert body["safety_identifier"] is None
+    assert body["prompt_cache_key"] is None
+    assert body["max_tool_calls"] is None
+
+
+def test_responses_stream_schema_fields():
+    """Verify streaming done-events have the sequence_number, output_index,
+    and content_index fields added by this PR. Also verify the completed
+    response includes the 24 new schema fields."""
+    global server
+    server.start()
+    res = server.make_stream_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": "Book",
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+        "stream": True,
+    })
+    seen_seq_nums = []
+    saw_output_text_done = False
+    saw_content_part_done = False
+    saw_output_item_done = False
+    completed_response = None
+    for data in res:
+        assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}"
+        seen_seq_nums.append(data["sequence_number"])
+        if data.get("type") == "response.output_text.done":
+            saw_output_text_done = True
+            assert "content_index" in data
+            assert "output_index" in data
+            assert "logprobs" in data
+            assert isinstance(data["logprobs"], list)
+        if data.get("type") == "response.content_part.done":
+            saw_content_part_done = True
+            assert "content_index" in data
+            assert "output_index" in data
+        if data.get("type") == "response.output_item.done":
+            saw_output_item_done = True
+            assert "output_index" in data
+        if data.get("type") == "response.completed":
+            completed_response = data["response"]
+    # Must have seen all done-event types
+    assert saw_output_text_done, "never received response.output_text.done"
+    assert saw_content_part_done, "never received response.content_part.done"
+    assert saw_output_item_done, "never received response.output_item.done"
+    # sequence_number must be present on done events and monotonically increasing
+    assert len(seen_seq_nums) >= 4, f"expected >= 4 sequenced events, got {len(seen_seq_nums)}"
+    assert all(a < b for a, b in zip(seen_seq_nums, seen_seq_nums[1:])), "sequence_numbers not strictly increasing"
+    # completed response must have the new schema fields with correct values
+    assert completed_response is not None
+    assert completed_response["metadata"] == {}
+    assert completed_response["store"] == False
+    assert completed_response["truncation"] == "disabled"
+    assert completed_response["usage"]["output_tokens_details"]["reasoning_tokens"] == 0
+
+
+def test_responses_non_function_tool_skipped():
+    """Non-function tool types must be silently skipped, producing a valid
+    completion with no tools field in the converted chat request. Upstream
+    rejects non-function types with 400; our code must return 200 and
+    generate output as if no tools were provided."""
+    global server
+    server.start()
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+        "tools": [
+            {"type": "web_search"},
+            {"type": "code_interpreter"},
+        ],
+    })
+    assert res.status_code == 200
+    assert res.body["status"] == "completed"
+    # With all tools skipped, the model must still produce text output
+    assert len(res.body["output"]) > 0
+    assert len(res.body["output_text"]) > 0
+
+
+def test_responses_only_non_function_tools_same_as_no_tools():
+    """When ALL tools are non-function types, they should all be filtered out
+    and the result should be identical to a request with no tools at all.
+    Compare token counts to confirm the tools field was truly empty."""
+    global server
+    server.start()
+    no_tools = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    with_skipped_tools = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+        "tools": [
+            {"type": "web_search"},
+            {"type": "code_interpreter"},
+            {"type": "file_search"},
+        ],
+    })
+    assert no_tools.status_code == 200
+    assert with_skipped_tools.status_code == 200
+    # If tools were truly stripped, prompt token count must be identical
+    assert with_skipped_tools.body["usage"]["input_tokens"] == no_tools.body["usage"]["input_tokens"]
+
+
+def test_responses_extra_keys_stripped():
+    """Responses-only request keys (store, include, prompt_cache_key, etc.)
+    must be stripped before forwarding to the chat completions handler.
+    The completion must succeed and produce the same output as a request
+    without those keys."""
+    global server
+    server.start()
+    # Baseline without extra keys
+    baseline = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert baseline.status_code == 200
+    # Same request with extra Responses-only keys
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+        "store": True,
+        "include": ["usage"],
+        "prompt_cache_key": "test_key",
+        "web_search": {"enabled": True},
+        "text": {"format": {"type": "text"}},
+        "truncation": "auto",
+        "metadata": {"key": "value"},
+    })
+    assert res.status_code == 200
+    assert res.body["status"] == "completed"
+    # Extra keys must not affect token consumption
+    assert res.body["usage"]["input_tokens"] == baseline.body["usage"]["input_tokens"]
+
+
+def test_responses_developer_role_merging():
+    """Developer role messages must be merged into the first system message
+    at position 0. This ensures templates that require a single system
+    message don't see developer content as a separate turn.
+
+    We verify by comparing token counts: system + developer merged should
+    consume the same prompt tokens as a single system message with the
+    combined content."""
+    global server
+    server.start()
+    # Single combined system message
+    combined = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": [
+                {"type": "input_text", "text": "Book"},
+                {"type": "input_text", "text": "Keep it short"},
+            ]},
+            {"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert combined.status_code == 200
+    # Split system + developer (should be merged to same prompt)
+    split = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": [{"type": "input_text", "text": "Book"}]},
+            {"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
+            {"role": "developer", "content": [{"type": "input_text", "text": "Keep it short"}]},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert split.status_code == 200
+    assert split.body["status"] == "completed"
+    # Merged prompt should consume same number of input tokens
+    assert split.body["usage"]["input_tokens"] == combined.body["usage"]["input_tokens"]
+
+
+def test_responses_input_text_type_multi_turn():
+    """input_text type must be accepted for assistant messages (EasyInputMessage).
+    An assistant message without explicit type:'message' must also be accepted
+    (AssistantMessageItemParam). Verify the multi-turn context is preserved
+    by checking the model sees the full conversation."""
+    global server
+    server.start()
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "user", "content": [{"type": "input_text", "text": "Hello"}]},
+            {
+                "role": "assistant",
+                "content": [{"type": "input_text", "text": "Hi there"}],
+            },
+            {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert res.status_code == 200
+    assert res.body["status"] == "completed"
+    # Multi-turn input should result in more prompt tokens than single-turn
+    single = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": "How are you",
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert single.status_code == 200
+    assert res.body["usage"]["input_tokens"] > single.body["usage"]["input_tokens"]
+
+
+def test_responses_output_text_matches_content():
+    """output_text must be the concatenation of all output_text content parts.
+    Verify this for both streaming and non-streaming responses."""
+    global server
+    server.start()
+    # Non-streaming
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert res.status_code == 200
+    # Manually reconstruct output_text from content parts
+    reconstructed = ""
+    for item in res.body["output"]:
+        if item.get("type") == "message":
+            for part in item["content"]:
+                if part.get("type") == "output_text":
+                    reconstructed += part["text"]
+    assert res.body["output_text"] == reconstructed
+    assert len(reconstructed) > 0
+
+
+def test_responses_stream_output_text_consistency():
+    """Streaming gathered text must match the output_text in response.completed."""
+    global server
+    server.start()
+    res = server.make_stream_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+        "stream": True,
+    })
+    gathered_text = ""
+    completed_output_text = None
+    for data in res:
+        if data.get("type") == "response.output_text.delta":
+            gathered_text += data["delta"]
+        if data.get("type") == "response.completed":
+            completed_output_text = data["response"]["output_text"]
+            # Also verify content parts match
+            for item in data["response"]["output"]:
+                if item.get("type") == "message":
+                    for part in item["content"]:
+                        if part.get("type") == "output_text":
+                            assert part["text"] == gathered_text
+    assert completed_output_text is not None
+    assert gathered_text == completed_output_text
+    assert len(gathered_text) > 0
+
+
+def test_responses_stream_created_event_has_full_response():
+    """response.created must contain the full response object with all required
+    fields, not just {id, object, status}. This is needed by strict client
+    libraries like async-openai."""
+    global server
+    server.start()
+    res = server.make_stream_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+        "stream": True,
+    })
+    created_resp = None
+    in_progress_resp = None
+    for data in res:
+        if data.get("type") == "response.created":
+            created_resp = data["response"]
+        if data.get("type") == "response.in_progress":
+            in_progress_resp = data["response"]
+    assert created_resp is not None, "never received response.created"
+    assert in_progress_resp is not None, "never received response.in_progress"
+    # Both must have the full response object, not just minimal fields
+    for resp in [created_resp, in_progress_resp]:
+        assert resp["status"] == "in_progress"
+        assert resp["id"].startswith("resp_")
+        assert resp["object"] == "response"
+        assert resp["model"] is not None
+        assert resp["completed_at"] is None
+        assert resp["metadata"] == {}
+        assert resp["store"] == False
+        assert resp["truncation"] == "disabled"
+        assert resp["tools"] == []
+        assert resp["usage"]["input_tokens"] == 0
+        assert resp["usage"]["output_tokens"] == 0
+        assert resp["output"] == []
+        assert resp["output_text"] == ""
+
+
+def test_responses_stream_all_events_have_sequence_number():
+    """Every streaming event must have a sequence_number field and they must
+    be strictly increasing across the entire stream."""
+    global server
+    server.start()
+    res = server.make_stream_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+        "stream": True,
+    })
+    all_seq_nums = []
+    event_types = []
+    for data in res:
+        assert "sequence_number" in data, f"missing sequence_number in event type {data.get('type')}"
+        all_seq_nums.append(data["sequence_number"])
+        event_types.append(data.get("type", "unknown"))
+    # Must have received multiple events
+    assert len(all_seq_nums) >= 6, f"expected >= 6 events, got {len(all_seq_nums)}: {event_types}"
+    # Must be strictly increasing
+    for i in range(1, len(all_seq_nums)):
+        assert all_seq_nums[i] > all_seq_nums[i-1], \
+            f"sequence_number not strictly increasing at index {i}: {all_seq_nums[i-1]} -> {all_seq_nums[i]} (events: {event_types[i-1]} -> {event_types[i]})"
+
+
+def test_responses_stream_delta_events_have_indices():
+    """Delta and added events must have output_index. Content-related events
+    must also have content_index."""
+    global server
+    server.start()
+    res = server.make_stream_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+        "stream": True,
+    })
+    saw_output_item_added = False
+    saw_content_part_added = False
+    saw_output_text_delta = False
+    for data in res:
+        evt = data.get("type", "")
+        if evt == "response.output_item.added":
+            saw_output_item_added = True
+            assert "output_index" in data, "output_item.added missing output_index"
+        if evt == "response.content_part.added":
+            saw_content_part_added = True
+            assert "output_index" in data, "content_part.added missing output_index"
+            assert "content_index" in data, "content_part.added missing content_index"
+        if evt == "response.output_text.delta":
+            saw_output_text_delta = True
+            assert "output_index" in data, "output_text.delta missing output_index"
+            assert "content_index" in data, "output_text.delta missing content_index"
+    assert saw_output_item_added, "never received response.output_item.added"
+    assert saw_content_part_added, "never received response.content_part.added"
+    assert saw_output_text_delta, "never received response.output_text.delta"
+
+
+def test_responses_reasoning_content_array():
+    """Reasoning items with content as array (spec format) must be accepted."""
+    global server
+    server.start()
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
+            {"type": "reasoning", "summary": [],
+             "content": [{"type": "reasoning_text", "text": "thinking"}]},
+            {"role": "assistant", "type": "message",
+             "content": [{"type": "output_text", "text": "Hello"}]},
+            {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert res.status_code == 200
+    assert res.body["status"] == "completed"
+
+
+def test_responses_reasoning_content_string():
+    """Reasoning items with content as plain string (OpenCode format) must be accepted."""
+    global server
+    server.start()
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
+            {"type": "reasoning", "summary": [], "content": "thinking about it"},
+            {"role": "assistant", "type": "message",
+             "content": [{"type": "output_text", "text": "Hello"}]},
+            {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert res.status_code == 200
+    assert res.body["status"] == "completed"
+
+
+def test_responses_reasoning_content_null():
+    """Reasoning items with content:null (Codex format, issue openai/codex#11834)
+    must be accepted — content may be null when encrypted_content is present."""
+    global server
+    server.start()
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
+            {"type": "reasoning", "summary": [], "content": None,
+             "encrypted_content": "opaque_data_here"},
+            {"role": "assistant", "type": "message",
+             "content": [{"type": "output_text", "text": "Hello"}]},
+            {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert res.status_code == 200
+    assert res.body["status"] == "completed"
+
+
+def test_responses_reasoning_content_omitted():
+    """Reasoning items with content omitted entirely must be accepted."""
+    global server
+    server.start()
+    res = server.make_request("POST", "/v1/responses", data={
+        "model": "gpt-4.1",
+        "input": [
+            {"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
+            {"type": "reasoning", "summary": []},
+            {"role": "assistant", "type": "message",
+             "content": [{"type": "output_text", "text": "Hello"}]},
+            {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
+        ],
+        "max_output_tokens": 8,
+        "temperature": 0.8,
+    })
+    assert res.status_code == 200
+    assert res.body["status"] == "completed"