diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index ed5e306fc5..bde76e8392 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1255,45 +1255,59 @@ json convert_responses_to_chatcmpl(const json & response_body) { if (item.contains("status")) { item.erase("status"); } + // Merge system/developer messages into the first system message. + // Many model templates (e.g. Qwen) require all system content at + // position 0 and reject system messages elsewhere in the conversation. + if (item.at("role") == "system" || item.at("role") == "developer") { + if (!chatcmpl_messages.empty() && chatcmpl_messages[0].value("role", "") == "system") { + auto & first_msg = chatcmpl_messages[0]; + // Convert string content to array format if needed + if (first_msg["content"].is_string()) { + std::string old_text = first_msg["content"].get(); + first_msg["content"] = json::array({json{{"text", old_text}, {"type", "text"}}}); + } + auto & first_content = first_msg["content"]; + for (const auto & part : chatcmpl_content) { + first_content.push_back(part); + } + continue; // merged, don't push a separate message + } + item["role"] = "system"; + } item["content"] = chatcmpl_content; chatcmpl_messages.push_back(item); } else if (exists_and_is_array(item, "content") && exists_and_is_string(item, "role") && item.at("role") == "assistant" && - // exists_and_is_string(item, "status") && - // (item.at("status") == "in_progress" || - // item.at("status") == "completed" || - // item.at("status") == "incomplete") && - // item["status"] not sent by codex-cli - exists_and_is_string(item, "type") && - item.at("type") == "message" + // status not checked (not always present, e.g. codex-cli omits it) + // type == "message" for OutputMessage, absent for EasyInputMessage + (!item.contains("type") || item.at("type") == "message") ) { // #responses_create-input-input_item_list-item-output_message - auto chatcmpl_content = json::array(); + // Also handles AssistantMessageItemParam / EasyInputMessage with role "assistant" + std::vector chatcmpl_content; for (const auto & output_text : item.at("content")) { const std::string type = json_value(output_text, "type", std::string()); - if (type == "output_text") { + if (type == "output_text" || type == "input_text") { if (!exists_and_is_string(output_text, "text")) { throw std::invalid_argument("'Output text' requires 'text'"); - // Ignore annotations and logprobs for now - chatcmpl_content.push_back({ - {"text", output_text.at("text")}, - {"type", "text"}, - }); } + chatcmpl_content.push_back({ + {"text", output_text.at("text")}, + {"type", "text"}, + }); } else if (type == "refusal") { if (!exists_and_is_string(output_text, "refusal")) { throw std::invalid_argument("'Refusal' requires 'refusal'"); - // Ignore annotations and logprobs for now - chatcmpl_content.push_back({ - {"refusal", output_text.at("refusal")}, - {"type", "refusal"}, - }); } + chatcmpl_content.push_back({ + {"refusal", output_text.at("refusal")}, + {"type", "refusal"}, + }); } else { - throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'"); + throw std::invalid_argument("'type' must be 'output_text', 'input_text', or 'refusal'"); } } @@ -1303,7 +1317,9 @@ json convert_responses_to_chatcmpl(const json & response_body) { prev_msg["content"] = json::array(); } auto & prev_content = prev_msg["content"]; - prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end()); + for (const auto & part : chatcmpl_content) { + prev_content.push_back(part); + } } else { item.erase("status"); item.erase("type"); @@ -1371,24 +1387,30 @@ json convert_responses_to_chatcmpl(const json & response_body) { item.at("type") == "reasoning") { // #responses_create-input-input_item_list-item-reasoning - if (!exists_and_is_array(item, "content")) { - throw std::invalid_argument("item['content'] is not an array"); - } - if (item.at("content").empty()) { - throw std::invalid_argument("item['content'] is empty"); - } - if (!exists_and_is_string(item.at("content")[0], "text")) { - throw std::invalid_argument("item['content']['text'] is not a string"); + // content can be: null, omitted, a string, or array of {type, text} objects. + // Codex may send content:null or omit it entirely (issue openai/codex#11834). + // OpenCode may send content as a plain string. + // The spec uses array format: [{"type":"reasoning_text","text":"..."}]. + // encrypted_content (opaque string) is accepted but ignored for local models. + std::string reasoning_text; + if (!item.contains("content") || item.at("content").is_null()) { + // null or missing content — skip (encrypted_content only, or empty reasoning) + } else if (item.at("content").is_string()) { + reasoning_text = item.at("content").get(); + } else if (item.at("content").is_array() && !item.at("content").empty() + && exists_and_is_string(item.at("content")[0], "text")) { + reasoning_text = item.at("content")[0].at("text").get(); } + // else: empty array or unrecognized format — treat as empty reasoning if (merge_prev) { auto & prev_msg = chatcmpl_messages.back(); - prev_msg["reasoning_content"] = item.at("content")[0].at("text"); + prev_msg["reasoning_content"] = reasoning_text; } else { chatcmpl_messages.push_back(json { {"role", "assistant"}, {"content", json::array()}, - {"reasoning_content", item.at("content")[0].at("text")}, + {"reasoning_content", reasoning_text}, }); } } else { @@ -1407,11 +1429,17 @@ json convert_responses_to_chatcmpl(const json & response_body) { } std::vector chatcmpl_tools; for (json resp_tool : response_body.at("tools")) { - json chatcmpl_tool; + const std::string tool_type = json_value(resp_tool, "type", std::string()); - if (json_value(resp_tool, "type", std::string()) != "function") { - throw std::invalid_argument("'type' of tool must be 'function'"); + // Skip non-function tools (e.g. web_search, code_interpreter) + // sent by clients like Codex CLI — these are provider-specific + // and cannot be converted to chat completions function tools + if (tool_type != "function") { + SRV_WRN("skipping unsupported tool type '%s' in Responses conversion\n", tool_type.c_str()); + continue; } + + json chatcmpl_tool; resp_tool.erase("type"); chatcmpl_tool["type"] = "function"; @@ -1422,7 +1450,9 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_tools.push_back(chatcmpl_tool); } chatcmpl_body.erase("tools"); - chatcmpl_body["tools"] = chatcmpl_tools; + if (!chatcmpl_tools.empty()) { + chatcmpl_body["tools"] = chatcmpl_tools; + } } if (response_body.contains("max_output_tokens")) { @@ -1430,6 +1460,15 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; } + // Strip Responses-only keys that have no chat completions equivalent + // (e.g. Codex CLI sends store, include, prompt_cache_key, web_search) + for (const char * key : { + "store", "include", "prompt_cache_key", "web_search", + "text", "truncation", "metadata", + }) { + chatcmpl_body.erase(key); + } + return chatcmpl_body; } diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 3018ac90f8..b2de62d86f 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -917,6 +917,71 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { return deltas; } +static std::string build_output_text(const std::vector & output) { + std::string result; + for (const auto & item : output) { + if (json_value(item, "type", std::string()) == "message") { + for (const auto & part : item.at("content")) { + if (json_value(part, "type", std::string()) == "output_text") { + result += part.at("text").get(); + } + } + } + } + return result; +} + +static json build_oai_resp_metadata(const std::string & oai_resp_id, + const std::string & oaicompat_model, + const std::vector & output, + const std::string & output_text, + int n_prompt_tokens, + int n_decoded, + int n_prompt_tokens_cache, + const std::string & status = "completed") { + std::time_t t = std::time(0); + return json { + {"completed_at", status == "completed" ? json(t) : json(nullptr)}, + {"created_at", t}, + {"id", oai_resp_id}, + {"model", oaicompat_model}, + {"object", "response"}, + {"output", output}, + {"output_text", output_text}, + {"status", status}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens}, + {"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}}, + {"output_tokens_details", json{{"reasoning_tokens", 0}}}, + }}, + {"incomplete_details", nullptr}, + {"previous_response_id", nullptr}, + {"instructions", nullptr}, + {"error", nullptr}, + {"tools", json::array()}, + {"tool_choice", "auto"}, + {"truncation", "disabled"}, + {"parallel_tool_calls", false}, + {"text", json{{"format", json{{"type", "text"}}}}}, + {"top_p", 1.0}, + {"presence_penalty", 0.0}, + {"frequency_penalty", 0.0}, + {"top_logprobs", 0}, + {"temperature", 1.0}, + {"reasoning", nullptr}, + {"max_output_tokens", nullptr}, + {"max_tool_calls", nullptr}, + {"store", false}, + {"background", false}, + {"service_tier", "default"}, + {"safety_identifier", nullptr}, + {"prompt_cache_key", nullptr}, + {"metadata", json::object()}, + }; +} + json server_task_result_cmpl_final::to_json_oaicompat_resp() { common_chat_msg msg; if (!oaicompat_msg.empty()) { @@ -960,36 +1025,24 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { output.push_back(json { {"type", "function_call"}, - {"status", "completed"}, - {"arguments", tool_call.arguments}, - {"call_id", "fc_" + tool_call.id}, + {"id", "fc_" + random_string()}, + {"call_id", tool_call.id}, {"name", tool_call.name}, + {"arguments", tool_call.arguments}, + {"status", "completed"}, }); } - std::time_t t = std::time(0); - json res = { - {"completed_at", t}, - {"created_at", t}, - {"id", oai_resp_id}, - {"model", oaicompat_model}, - {"object", "response"}, - {"output", output}, - {"status", "completed"}, - {"usage", json { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", n_decoded}, - {"total_tokens", n_decoded + n_prompt_tokens}, - {"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }}, - }}, - }; - - return res; + std::string output_text = build_output_text(output); + return build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text, + n_prompt_tokens, n_decoded, n_prompt_tokens_cache); } json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { std::vector server_sent_events; std::vector output; + int & seq_num = oai_resp_seq_num; + int output_idx = 0; if (oaicompat_msg.reasoning_content != "") { const json output_item = json { @@ -1001,25 +1054,33 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"type", "reasoning_text"}, }})}, {"encrypted_content", ""}, + {"status", "completed"}, }; server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} + {"type", "response.output_item.done"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx}, + {"item", output_item}, }} }); output.push_back(output_item); + output_idx++; } if (oaicompat_msg.content != "") { server_sent_events.push_back(json { {"event", "response.output_text.done"}, {"data", json { - {"type", "response.output_text.done"}, - {"item_id", oai_resp_message_id}, - {"text", oaicompat_msg.content} + {"type", "response.output_text.done"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx}, + {"content_index", 0}, + {"item_id", oai_resp_message_id}, + {"text", oaicompat_msg.content}, + {"logprobs", json::array()}, }} }); @@ -1033,9 +1094,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { server_sent_events.push_back(json { {"event", "response.content_part.done"}, {"data", json { - {"type", "response.content_part.done"}, - {"item_id", oai_resp_message_id}, - {"part", content_part} + {"type", "response.content_part.done"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx}, + {"content_index", 0}, + {"item_id", oai_resp_message_id}, + {"part", content_part}, }} }); const json output_item = { @@ -1049,50 +1113,52 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} + {"type", "response.output_item.done"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx}, + {"item", output_item}, }} }); output.push_back(output_item); + output_idx++; } - for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { + for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) { + const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx]; + const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size() + ? oai_resp_fc_item_ids[tc_idx] + : "fc_" + random_string(); // fallback for non-streaming path const json output_item = { {"type", "function_call"}, - {"status", "completed"}, + {"id", fc_id}, + {"call_id", tool_call.id}, + {"name", tool_call.name}, {"arguments", tool_call.arguments}, - {"call_id", "fc_" + tool_call.id}, - {"name", tool_call.name} + {"status", "completed"}, }; server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} + {"type", "response.output_item.done"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx}, + {"item", output_item}, }} }); output.push_back(output_item); + output_idx++; } - std::time_t t = std::time(0); + std::string output_text = build_output_text(output); + json resp = build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text, + n_prompt_tokens, n_decoded, n_prompt_tokens_cache); + server_sent_events.push_back(json { {"event", "response.completed"}, {"data", json { - {"type", "response.completed"}, - {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"created_at", t}, - {"status", "completed"}, - {"model", oaicompat_model}, - {"output", output}, - {"usage", json { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", n_decoded}, - {"total_tokens", n_decoded + n_prompt_tokens}, - {"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }}, - }} - }}, + {"type", "response.completed"}, + {"sequence_number", seq_num++}, + {"response", resp}, }} }); @@ -1368,20 +1434,44 @@ void server_task_result_cmpl_partial::update(task_result_state & state) { oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; oai_resp_fc_id = state.oai_resp_fc_id; + oai_resp_fc_item_id = state.oai_resp_fc_item_id; + oai_resp_seq_num = state.oai_resp_seq_num; + oai_resp_output_idx = state.oai_resp_output_idx; // track if the accumulated message has any reasoning content anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty(); // Pre-compute state updates based on diffs (for next chunk) + // Also advance seq_num/output_idx to match events that to_json_oaicompat_resp() will emit + if (n_decoded == 1) { + state.oai_resp_seq_num += 2; // response.created + response.in_progress + } for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { - if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) { - state.thinking_block_started = true; + if (!diff.reasoning_content_delta.empty()) { + if (!state.thinking_block_started) { + state.thinking_block_started = true; + state.oai_resp_seq_num++; // output_item.added + state.oai_resp_output_idx++; + } + state.oai_resp_seq_num++; // reasoning_text.delta } - if (!diff.content_delta.empty() && !state.text_block_started) { - state.text_block_started = true; + if (!diff.content_delta.empty()) { + if (!state.text_block_started) { + state.text_block_started = true; + state.oai_resp_seq_num += 2; // output_item.added + content_part.added + state.oai_resp_output_idx++; + } + state.oai_resp_seq_num++; // output_text.delta } if (!diff.tool_call_delta.name.empty()) { state.oai_resp_fc_id = diff.tool_call_delta.id; + state.oai_resp_fc_item_id = "fc_" + random_string(); + state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id); + state.oai_resp_seq_num++; // output_item.added + state.oai_resp_output_idx++; + } + if (!diff.tool_call_delta.arguments.empty()) { + state.oai_resp_seq_num++; // function_call_arguments.delta } } } @@ -1523,28 +1613,29 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() { json server_task_result_cmpl_partial::to_json_oaicompat_resp() { std::vector events; + int & seq_num = oai_resp_seq_num; + int & output_idx = oai_resp_output_idx; if (n_decoded == 1) { + // Build initial response object with all required fields but empty output and zeroed usage + json initial_resp = build_oai_resp_metadata( + oai_resp_id, oaicompat_model, {}, "", + 0, 0, 0, "in_progress"); + events.push_back(json { {"event", "response.created"}, {"data", json { - {"type", "response.created"}, - {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, + {"type", "response.created"}, + {"sequence_number", seq_num++}, + {"response", initial_resp}, }}, }); events.push_back(json { {"event", "response.in_progress"}, {"data", json { - {"type", "response.in_progress"}, - {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, + {"type", "response.in_progress"}, + {"sequence_number", seq_num++}, + {"response", initial_resp}, }}, }); } @@ -1555,7 +1646,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.output_item.added"}, {"data", json { - {"type", "response.output_item.added"}, + {"type", "response.output_item.added"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx++}, {"item", json { {"id", oai_resp_reasoning_id}, {"summary", json::array()}, @@ -1571,9 +1664,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.reasoning_text.delta"}, {"data", json { - {"type", "response.reasoning_text.delta"}, - {"delta", diff.reasoning_content_delta}, - {"item_id", oai_resp_reasoning_id}, + {"type", "response.reasoning_text.delta"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx - 1}, + {"content_index", 0}, + {"delta", diff.reasoning_content_delta}, + {"item_id", oai_resp_reasoning_id}, }}, }); } @@ -1583,7 +1679,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.output_item.added"}, {"data", json { - {"type", "response.output_item.added"}, + {"type", "response.output_item.added"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx++}, {"item", json { {"content", json::array()}, {"id", oai_resp_message_id}, @@ -1596,8 +1694,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.content_part.added"}, {"data", json { - {"type", "response.content_part.added"}, - {"item_id", oai_resp_message_id}, + {"type", "response.content_part.added"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx - 1}, + {"content_index", 0}, + {"item_id", oai_resp_message_id}, {"part", json { {"type", "output_text"}, {"text", ""}, @@ -1609,9 +1710,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.output_text.delta"}, {"data", json { - {"type", "response.output_text.delta"}, - {"item_id", oai_resp_message_id}, - {"delta", diff.content_delta}, + {"type", "response.output_text.delta"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx - 1}, + {"content_index", 0}, + {"item_id", oai_resp_message_id}, + {"delta", diff.content_delta}, }}, }); } @@ -1620,26 +1724,30 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { events.push_back(json { {"event", "response.output_item.added"}, {"data", json { - {"type", "response.output_item.added"}, + {"type", "response.output_item.added"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx++}, {"item", json { + {"id", oai_resp_fc_item_id}, {"arguments", ""}, - {"call_id", "fc_" + diff.tool_call_delta.id}, + {"call_id", diff.tool_call_delta.id}, {"name", diff.tool_call_delta.name}, {"type", "function_call"}, {"status", "in_progress"}, }}, }}, }); - oai_resp_fc_id = diff.tool_call_delta.id; } if (!diff.tool_call_delta.arguments.empty()) { events.push_back(json { {"event", "response.function_call_arguments.delta"}, {"data", json { - {"type", "response.function_call_arguments.delta"}, - {"delta", diff.tool_call_delta.arguments}, - {"item_id", "fc_" + oai_resp_fc_id}, + {"type", "response.function_call_arguments.delta"}, + {"sequence_number", seq_num++}, + {"output_index", output_idx - 1}, + {"delta", diff.tool_call_delta.arguments}, + {"item_id", oai_resp_fc_item_id}, }}, }); } diff --git a/tools/server/server-task.h b/tools/server/server-task.h index a49ddb594b..49040445d3 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -109,7 +109,11 @@ struct task_result_state { const std::string oai_resp_id; const std::string oai_resp_reasoning_id; const std::string oai_resp_message_id; - std::string oai_resp_fc_id; // function call ID for current args delta + std::string oai_resp_fc_id; // model's tool_call ID for current function call + std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call + std::vector oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance + int oai_resp_seq_num = 0; // monotonically increasing per-stream + int oai_resp_output_idx = 0; // tracks current output item index task_result_state(const common_chat_parser_params & chat_parser_params) : chat_parser_params(chat_parser_params) @@ -370,6 +374,8 @@ struct server_task_result_cmpl_final : server_task_result { std::string oai_resp_id; std::string oai_resp_reasoning_id; std::string oai_resp_message_id; + std::vector oai_resp_fc_item_ids; + int oai_resp_seq_num = 0; virtual bool is_stop() override { return true; // in stream mode, final responses are considered stop @@ -384,6 +390,8 @@ struct server_task_result_cmpl_final : server_task_result { oai_resp_id = state.oai_resp_id; oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; + oai_resp_fc_item_ids = state.oai_resp_fc_item_ids; + oai_resp_seq_num = state.oai_resp_seq_num; } json to_json_non_oaicompat(); @@ -436,6 +444,9 @@ struct server_task_result_cmpl_partial : server_task_result { std::string oai_resp_reasoning_id; std::string oai_resp_message_id; std::string oai_resp_fc_id; + std::string oai_resp_fc_item_id; + int oai_resp_seq_num = 0; + int oai_resp_output_idx = 0; // for Anthropic API: track if any reasoning content has been generated bool anthropic_has_reasoning = false; diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py index 7aab4a8ba6..0bd2989755 100644 --- a/tools/server/tests/unit/test_compat_oai_responses.py +++ b/tools/server/tests/unit/test_compat_oai_responses.py @@ -71,3 +71,524 @@ def test_responses_stream_with_openai_library(): assert r.response.output[0].id.startswith("msg_") assert gathered_text == r.response.output_text assert match_regex("(Suddenly)+", r.response.output_text) + + +def test_responses_schema_fields(): + """Verify the 24 Response object fields added by this PR are present + with correct types and default values. These fields are required by + the OpenAI Responses API spec but were missing before this change.""" + global server + server.start() + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": "Book", + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert res.status_code == 200 + body = res.body + # Usage sub-fields added by this PR + usage = body["usage"] + assert isinstance(usage["input_tokens_details"]["cached_tokens"], int) + assert isinstance(usage["output_tokens_details"]["reasoning_tokens"], int) + # All 24 fields added by this PR must be present with correct defaults + assert body["incomplete_details"] is None + assert body["previous_response_id"] is None + assert body["instructions"] is None + assert body["error"] is None + assert body["tools"] == [] + assert body["tool_choice"] == "auto" + assert body["truncation"] == "disabled" + assert body["parallel_tool_calls"] == False + assert body["text"] == {"format": {"type": "text"}} + assert body["top_p"] == 1.0 + assert body["temperature"] == 1.0 + assert body["presence_penalty"] == 0.0 + assert body["frequency_penalty"] == 0.0 + assert body["top_logprobs"] == 0 + assert body["reasoning"] is None + assert body["max_output_tokens"] is None + assert body["store"] == False + assert body["service_tier"] == "default" + assert body["metadata"] == {} + assert body["background"] == False + assert body["safety_identifier"] is None + assert body["prompt_cache_key"] is None + assert body["max_tool_calls"] is None + + +def test_responses_stream_schema_fields(): + """Verify streaming done-events have the sequence_number, output_index, + and content_index fields added by this PR. Also verify the completed + response includes the 24 new schema fields.""" + global server + server.start() + res = server.make_stream_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": "Book", + "max_output_tokens": 8, + "temperature": 0.8, + "stream": True, + }) + seen_seq_nums = [] + saw_output_text_done = False + saw_content_part_done = False + saw_output_item_done = False + completed_response = None + for data in res: + assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}" + seen_seq_nums.append(data["sequence_number"]) + if data.get("type") == "response.output_text.done": + saw_output_text_done = True + assert "content_index" in data + assert "output_index" in data + assert "logprobs" in data + assert isinstance(data["logprobs"], list) + if data.get("type") == "response.content_part.done": + saw_content_part_done = True + assert "content_index" in data + assert "output_index" in data + if data.get("type") == "response.output_item.done": + saw_output_item_done = True + assert "output_index" in data + if data.get("type") == "response.completed": + completed_response = data["response"] + # Must have seen all done-event types + assert saw_output_text_done, "never received response.output_text.done" + assert saw_content_part_done, "never received response.content_part.done" + assert saw_output_item_done, "never received response.output_item.done" + # sequence_number must be present on done events and monotonically increasing + assert len(seen_seq_nums) >= 4, f"expected >= 4 sequenced events, got {len(seen_seq_nums)}" + assert all(a < b for a, b in zip(seen_seq_nums, seen_seq_nums[1:])), "sequence_numbers not strictly increasing" + # completed response must have the new schema fields with correct values + assert completed_response is not None + assert completed_response["metadata"] == {} + assert completed_response["store"] == False + assert completed_response["truncation"] == "disabled" + assert completed_response["usage"]["output_tokens_details"]["reasoning_tokens"] == 0 + + +def test_responses_non_function_tool_skipped(): + """Non-function tool types must be silently skipped, producing a valid + completion with no tools field in the converted chat request. Upstream + rejects non-function types with 400; our code must return 200 and + generate output as if no tools were provided.""" + global server + server.start() + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + "tools": [ + {"type": "web_search"}, + {"type": "code_interpreter"}, + ], + }) + assert res.status_code == 200 + assert res.body["status"] == "completed" + # With all tools skipped, the model must still produce text output + assert len(res.body["output"]) > 0 + assert len(res.body["output_text"]) > 0 + + +def test_responses_only_non_function_tools_same_as_no_tools(): + """When ALL tools are non-function types, they should all be filtered out + and the result should be identical to a request with no tools at all. + Compare token counts to confirm the tools field was truly empty.""" + global server + server.start() + no_tools = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + with_skipped_tools = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + "tools": [ + {"type": "web_search"}, + {"type": "code_interpreter"}, + {"type": "file_search"}, + ], + }) + assert no_tools.status_code == 200 + assert with_skipped_tools.status_code == 200 + # If tools were truly stripped, prompt token count must be identical + assert with_skipped_tools.body["usage"]["input_tokens"] == no_tools.body["usage"]["input_tokens"] + + +def test_responses_extra_keys_stripped(): + """Responses-only request keys (store, include, prompt_cache_key, etc.) + must be stripped before forwarding to the chat completions handler. + The completion must succeed and produce the same output as a request + without those keys.""" + global server + server.start() + # Baseline without extra keys + baseline = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert baseline.status_code == 200 + # Same request with extra Responses-only keys + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + "store": True, + "include": ["usage"], + "prompt_cache_key": "test_key", + "web_search": {"enabled": True}, + "text": {"format": {"type": "text"}}, + "truncation": "auto", + "metadata": {"key": "value"}, + }) + assert res.status_code == 200 + assert res.body["status"] == "completed" + # Extra keys must not affect token consumption + assert res.body["usage"]["input_tokens"] == baseline.body["usage"]["input_tokens"] + + +def test_responses_developer_role_merging(): + """Developer role messages must be merged into the first system message + at position 0. This ensures templates that require a single system + message don't see developer content as a separate turn. + + We verify by comparing token counts: system + developer merged should + consume the same prompt tokens as a single system message with the + combined content.""" + global server + server.start() + # Single combined system message + combined = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": [ + {"type": "input_text", "text": "Book"}, + {"type": "input_text", "text": "Keep it short"}, + ]}, + {"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert combined.status_code == 200 + # Split system + developer (should be merged to same prompt) + split = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": [{"type": "input_text", "text": "Book"}]}, + {"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]}, + {"role": "developer", "content": [{"type": "input_text", "text": "Keep it short"}]}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert split.status_code == 200 + assert split.body["status"] == "completed" + # Merged prompt should consume same number of input tokens + assert split.body["usage"]["input_tokens"] == combined.body["usage"]["input_tokens"] + + +def test_responses_input_text_type_multi_turn(): + """input_text type must be accepted for assistant messages (EasyInputMessage). + An assistant message without explicit type:'message' must also be accepted + (AssistantMessageItemParam). Verify the multi-turn context is preserved + by checking the model sees the full conversation.""" + global server + server.start() + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "user", "content": [{"type": "input_text", "text": "Hello"}]}, + { + "role": "assistant", + "content": [{"type": "input_text", "text": "Hi there"}], + }, + {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert res.status_code == 200 + assert res.body["status"] == "completed" + # Multi-turn input should result in more prompt tokens than single-turn + single = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": "How are you", + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert single.status_code == 200 + assert res.body["usage"]["input_tokens"] > single.body["usage"]["input_tokens"] + + +def test_responses_output_text_matches_content(): + """output_text must be the concatenation of all output_text content parts. + Verify this for both streaming and non-streaming responses.""" + global server + server.start() + # Non-streaming + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert res.status_code == 200 + # Manually reconstruct output_text from content parts + reconstructed = "" + for item in res.body["output"]: + if item.get("type") == "message": + for part in item["content"]: + if part.get("type") == "output_text": + reconstructed += part["text"] + assert res.body["output_text"] == reconstructed + assert len(reconstructed) > 0 + + +def test_responses_stream_output_text_consistency(): + """Streaming gathered text must match the output_text in response.completed.""" + global server + server.start() + res = server.make_stream_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + "stream": True, + }) + gathered_text = "" + completed_output_text = None + for data in res: + if data.get("type") == "response.output_text.delta": + gathered_text += data["delta"] + if data.get("type") == "response.completed": + completed_output_text = data["response"]["output_text"] + # Also verify content parts match + for item in data["response"]["output"]: + if item.get("type") == "message": + for part in item["content"]: + if part.get("type") == "output_text": + assert part["text"] == gathered_text + assert completed_output_text is not None + assert gathered_text == completed_output_text + assert len(gathered_text) > 0 + + +def test_responses_stream_created_event_has_full_response(): + """response.created must contain the full response object with all required + fields, not just {id, object, status}. This is needed by strict client + libraries like async-openai.""" + global server + server.start() + res = server.make_stream_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + "stream": True, + }) + created_resp = None + in_progress_resp = None + for data in res: + if data.get("type") == "response.created": + created_resp = data["response"] + if data.get("type") == "response.in_progress": + in_progress_resp = data["response"] + assert created_resp is not None, "never received response.created" + assert in_progress_resp is not None, "never received response.in_progress" + # Both must have the full response object, not just minimal fields + for resp in [created_resp, in_progress_resp]: + assert resp["status"] == "in_progress" + assert resp["id"].startswith("resp_") + assert resp["object"] == "response" + assert resp["model"] is not None + assert resp["completed_at"] is None + assert resp["metadata"] == {} + assert resp["store"] == False + assert resp["truncation"] == "disabled" + assert resp["tools"] == [] + assert resp["usage"]["input_tokens"] == 0 + assert resp["usage"]["output_tokens"] == 0 + assert resp["output"] == [] + assert resp["output_text"] == "" + + +def test_responses_stream_all_events_have_sequence_number(): + """Every streaming event must have a sequence_number field and they must + be strictly increasing across the entire stream.""" + global server + server.start() + res = server.make_stream_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + "stream": True, + }) + all_seq_nums = [] + event_types = [] + for data in res: + assert "sequence_number" in data, f"missing sequence_number in event type {data.get('type')}" + all_seq_nums.append(data["sequence_number"]) + event_types.append(data.get("type", "unknown")) + # Must have received multiple events + assert len(all_seq_nums) >= 6, f"expected >= 6 events, got {len(all_seq_nums)}: {event_types}" + # Must be strictly increasing + for i in range(1, len(all_seq_nums)): + assert all_seq_nums[i] > all_seq_nums[i-1], \ + f"sequence_number not strictly increasing at index {i}: {all_seq_nums[i-1]} -> {all_seq_nums[i]} (events: {event_types[i-1]} -> {event_types[i]})" + + +def test_responses_stream_delta_events_have_indices(): + """Delta and added events must have output_index. Content-related events + must also have content_index.""" + global server + server.start() + res = server.make_stream_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + "stream": True, + }) + saw_output_item_added = False + saw_content_part_added = False + saw_output_text_delta = False + for data in res: + evt = data.get("type", "") + if evt == "response.output_item.added": + saw_output_item_added = True + assert "output_index" in data, "output_item.added missing output_index" + if evt == "response.content_part.added": + saw_content_part_added = True + assert "output_index" in data, "content_part.added missing output_index" + assert "content_index" in data, "content_part.added missing content_index" + if evt == "response.output_text.delta": + saw_output_text_delta = True + assert "output_index" in data, "output_text.delta missing output_index" + assert "content_index" in data, "output_text.delta missing content_index" + assert saw_output_item_added, "never received response.output_item.added" + assert saw_content_part_added, "never received response.content_part.added" + assert saw_output_text_delta, "never received response.output_text.delta" + + +def test_responses_reasoning_content_array(): + """Reasoning items with content as array (spec format) must be accepted.""" + global server + server.start() + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "user", "content": [{"type": "input_text", "text": "Hi"}]}, + {"type": "reasoning", "summary": [], + "content": [{"type": "reasoning_text", "text": "thinking"}]}, + {"role": "assistant", "type": "message", + "content": [{"type": "output_text", "text": "Hello"}]}, + {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert res.status_code == 200 + assert res.body["status"] == "completed" + + +def test_responses_reasoning_content_string(): + """Reasoning items with content as plain string (OpenCode format) must be accepted.""" + global server + server.start() + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "user", "content": [{"type": "input_text", "text": "Hi"}]}, + {"type": "reasoning", "summary": [], "content": "thinking about it"}, + {"role": "assistant", "type": "message", + "content": [{"type": "output_text", "text": "Hello"}]}, + {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert res.status_code == 200 + assert res.body["status"] == "completed" + + +def test_responses_reasoning_content_null(): + """Reasoning items with content:null (Codex format, issue openai/codex#11834) + must be accepted — content may be null when encrypted_content is present.""" + global server + server.start() + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "user", "content": [{"type": "input_text", "text": "Hi"}]}, + {"type": "reasoning", "summary": [], "content": None, + "encrypted_content": "opaque_data_here"}, + {"role": "assistant", "type": "message", + "content": [{"type": "output_text", "text": "Hello"}]}, + {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert res.status_code == 200 + assert res.body["status"] == "completed" + + +def test_responses_reasoning_content_omitted(): + """Reasoning items with content omitted entirely must be accepted.""" + global server + server.start() + res = server.make_request("POST", "/v1/responses", data={ + "model": "gpt-4.1", + "input": [ + {"role": "user", "content": [{"type": "input_text", "text": "Hi"}]}, + {"type": "reasoning", "summary": []}, + {"role": "assistant", "type": "message", + "content": [{"type": "output_text", "text": "Hello"}]}, + {"role": "user", "content": [{"type": "input_text", "text": "How are you"}]}, + ], + "max_output_tokens": 8, + "temperature": 0.8, + }) + assert res.status_code == 200 + assert res.body["status"] == "completed"