This commit is contained in:
Christopher Albert 2026-04-01 14:05:51 +03:00 committed by GitHub
commit 53ecea86b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 800 additions and 121 deletions

View File

@ -1255,45 +1255,59 @@ json convert_responses_to_chatcmpl(const json & response_body) {
if (item.contains("status")) {
item.erase("status");
}
// Merge system/developer messages into the first system message.
// Many model templates (e.g. Qwen) require all system content at
// position 0 and reject system messages elsewhere in the conversation.
if (item.at("role") == "system" || item.at("role") == "developer") {
if (!chatcmpl_messages.empty() && chatcmpl_messages[0].value("role", "") == "system") {
auto & first_msg = chatcmpl_messages[0];
// Convert string content to array format if needed
if (first_msg["content"].is_string()) {
std::string old_text = first_msg["content"].get<std::string>();
first_msg["content"] = json::array({json{{"text", old_text}, {"type", "text"}}});
}
auto & first_content = first_msg["content"];
for (const auto & part : chatcmpl_content) {
first_content.push_back(part);
}
continue; // merged, don't push a separate message
}
item["role"] = "system";
}
item["content"] = chatcmpl_content;
chatcmpl_messages.push_back(item);
} else if (exists_and_is_array(item, "content") &&
exists_and_is_string(item, "role") &&
item.at("role") == "assistant" &&
// exists_and_is_string(item, "status") &&
// (item.at("status") == "in_progress" ||
// item.at("status") == "completed" ||
// item.at("status") == "incomplete") &&
// item["status"] not sent by codex-cli
exists_and_is_string(item, "type") &&
item.at("type") == "message"
// status not checked (not always present, e.g. codex-cli omits it)
// type == "message" for OutputMessage, absent for EasyInputMessage
(!item.contains("type") || item.at("type") == "message")
) {
// #responses_create-input-input_item_list-item-output_message
auto chatcmpl_content = json::array();
// Also handles AssistantMessageItemParam / EasyInputMessage with role "assistant"
std::vector<json> chatcmpl_content;
for (const auto & output_text : item.at("content")) {
const std::string type = json_value(output_text, "type", std::string());
if (type == "output_text") {
if (type == "output_text" || type == "input_text") {
if (!exists_and_is_string(output_text, "text")) {
throw std::invalid_argument("'Output text' requires 'text'");
// Ignore annotations and logprobs for now
chatcmpl_content.push_back({
{"text", output_text.at("text")},
{"type", "text"},
});
}
chatcmpl_content.push_back({
{"text", output_text.at("text")},
{"type", "text"},
});
} else if (type == "refusal") {
if (!exists_and_is_string(output_text, "refusal")) {
throw std::invalid_argument("'Refusal' requires 'refusal'");
// Ignore annotations and logprobs for now
chatcmpl_content.push_back({
{"refusal", output_text.at("refusal")},
{"type", "refusal"},
});
}
chatcmpl_content.push_back({
{"refusal", output_text.at("refusal")},
{"type", "refusal"},
});
} else {
throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'");
throw std::invalid_argument("'type' must be 'output_text', 'input_text', or 'refusal'");
}
}
@ -1303,7 +1317,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
prev_msg["content"] = json::array();
}
auto & prev_content = prev_msg["content"];
prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end());
for (const auto & part : chatcmpl_content) {
prev_content.push_back(part);
}
} else {
item.erase("status");
item.erase("type");
@ -1371,24 +1387,30 @@ json convert_responses_to_chatcmpl(const json & response_body) {
item.at("type") == "reasoning") {
// #responses_create-input-input_item_list-item-reasoning
if (!exists_and_is_array(item, "content")) {
throw std::invalid_argument("item['content'] is not an array");
}
if (item.at("content").empty()) {
throw std::invalid_argument("item['content'] is empty");
}
if (!exists_and_is_string(item.at("content")[0], "text")) {
throw std::invalid_argument("item['content']['text'] is not a string");
// content can be: null, omitted, a string, or array of {type, text} objects.
// Codex may send content:null or omit it entirely (issue openai/codex#11834).
// OpenCode may send content as a plain string.
// The spec uses array format: [{"type":"reasoning_text","text":"..."}].
// encrypted_content (opaque string) is accepted but ignored for local models.
std::string reasoning_text;
if (!item.contains("content") || item.at("content").is_null()) {
// null or missing content — skip (encrypted_content only, or empty reasoning)
} else if (item.at("content").is_string()) {
reasoning_text = item.at("content").get<std::string>();
} else if (item.at("content").is_array() && !item.at("content").empty()
&& exists_and_is_string(item.at("content")[0], "text")) {
reasoning_text = item.at("content")[0].at("text").get<std::string>();
}
// else: empty array or unrecognized format — treat as empty reasoning
if (merge_prev) {
auto & prev_msg = chatcmpl_messages.back();
prev_msg["reasoning_content"] = item.at("content")[0].at("text");
prev_msg["reasoning_content"] = reasoning_text;
} else {
chatcmpl_messages.push_back(json {
{"role", "assistant"},
{"content", json::array()},
{"reasoning_content", item.at("content")[0].at("text")},
{"reasoning_content", reasoning_text},
});
}
} else {
@ -1407,11 +1429,17 @@ json convert_responses_to_chatcmpl(const json & response_body) {
}
std::vector<json> chatcmpl_tools;
for (json resp_tool : response_body.at("tools")) {
json chatcmpl_tool;
const std::string tool_type = json_value(resp_tool, "type", std::string());
if (json_value(resp_tool, "type", std::string()) != "function") {
throw std::invalid_argument("'type' of tool must be 'function'");
// Skip non-function tools (e.g. web_search, code_interpreter)
// sent by clients like Codex CLI — these are provider-specific
// and cannot be converted to chat completions function tools
if (tool_type != "function") {
SRV_WRN("skipping unsupported tool type '%s' in Responses conversion\n", tool_type.c_str());
continue;
}
json chatcmpl_tool;
resp_tool.erase("type");
chatcmpl_tool["type"] = "function";
@ -1422,7 +1450,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
chatcmpl_tools.push_back(chatcmpl_tool);
}
chatcmpl_body.erase("tools");
chatcmpl_body["tools"] = chatcmpl_tools;
if (!chatcmpl_tools.empty()) {
chatcmpl_body["tools"] = chatcmpl_tools;
}
}
if (response_body.contains("max_output_tokens")) {
@ -1430,6 +1460,15 @@ json convert_responses_to_chatcmpl(const json & response_body) {
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
}
// Strip Responses-only keys that have no chat completions equivalent
// (e.g. Codex CLI sends store, include, prompt_cache_key, web_search)
for (const char * key : {
"store", "include", "prompt_cache_key", "web_search",
"text", "truncation", "metadata",
}) {
chatcmpl_body.erase(key);
}
return chatcmpl_body;
}

View File

@ -917,6 +917,71 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
return deltas;
}
static std::string build_output_text(const std::vector<json> & output) {
std::string result;
for (const auto & item : output) {
if (json_value(item, "type", std::string()) == "message") {
for (const auto & part : item.at("content")) {
if (json_value(part, "type", std::string()) == "output_text") {
result += part.at("text").get<std::string>();
}
}
}
}
return result;
}
static json build_oai_resp_metadata(const std::string & oai_resp_id,
const std::string & oaicompat_model,
const std::vector<json> & output,
const std::string & output_text,
int n_prompt_tokens,
int n_decoded,
int n_prompt_tokens_cache,
const std::string & status = "completed") {
std::time_t t = std::time(0);
return json {
{"completed_at", status == "completed" ? json(t) : json(nullptr)},
{"created_at", t},
{"id", oai_resp_id},
{"model", oaicompat_model},
{"object", "response"},
{"output", output},
{"output_text", output_text},
{"status", status},
{"usage", json {
{"input_tokens", n_prompt_tokens},
{"output_tokens", n_decoded},
{"total_tokens", n_decoded + n_prompt_tokens},
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
}},
{"incomplete_details", nullptr},
{"previous_response_id", nullptr},
{"instructions", nullptr},
{"error", nullptr},
{"tools", json::array()},
{"tool_choice", "auto"},
{"truncation", "disabled"},
{"parallel_tool_calls", false},
{"text", json{{"format", json{{"type", "text"}}}}},
{"top_p", 1.0},
{"presence_penalty", 0.0},
{"frequency_penalty", 0.0},
{"top_logprobs", 0},
{"temperature", 1.0},
{"reasoning", nullptr},
{"max_output_tokens", nullptr},
{"max_tool_calls", nullptr},
{"store", false},
{"background", false},
{"service_tier", "default"},
{"safety_identifier", nullptr},
{"prompt_cache_key", nullptr},
{"metadata", json::object()},
};
}
json server_task_result_cmpl_final::to_json_oaicompat_resp() {
common_chat_msg msg;
if (!oaicompat_msg.empty()) {
@ -960,36 +1025,24 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
output.push_back(json {
{"type", "function_call"},
{"status", "completed"},
{"arguments", tool_call.arguments},
{"call_id", "fc_" + tool_call.id},
{"id", "fc_" + random_string()},
{"call_id", tool_call.id},
{"name", tool_call.name},
{"arguments", tool_call.arguments},
{"status", "completed"},
});
}
std::time_t t = std::time(0);
json res = {
{"completed_at", t},
{"created_at", t},
{"id", oai_resp_id},
{"model", oaicompat_model},
{"object", "response"},
{"output", output},
{"status", "completed"},
{"usage", json {
{"input_tokens", n_prompt_tokens},
{"output_tokens", n_decoded},
{"total_tokens", n_decoded + n_prompt_tokens},
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
}},
};
return res;
std::string output_text = build_output_text(output);
return build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
}
json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
std::vector<json> server_sent_events;
std::vector<json> output;
int & seq_num = oai_resp_seq_num;
int output_idx = 0;
if (oaicompat_msg.reasoning_content != "") {
const json output_item = json {
@ -1001,25 +1054,33 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
{"type", "reasoning_text"},
}})},
{"encrypted_content", ""},
{"status", "completed"},
};
server_sent_events.push_back(json {
{"event", "response.output_item.done"},
{"data", json {
{"type", "response.output_item.done"},
{"item", output_item}
{"type", "response.output_item.done"},
{"sequence_number", seq_num++},
{"output_index", output_idx},
{"item", output_item},
}}
});
output.push_back(output_item);
output_idx++;
}
if (oaicompat_msg.content != "") {
server_sent_events.push_back(json {
{"event", "response.output_text.done"},
{"data", json {
{"type", "response.output_text.done"},
{"item_id", oai_resp_message_id},
{"text", oaicompat_msg.content}
{"type", "response.output_text.done"},
{"sequence_number", seq_num++},
{"output_index", output_idx},
{"content_index", 0},
{"item_id", oai_resp_message_id},
{"text", oaicompat_msg.content},
{"logprobs", json::array()},
}}
});
@ -1033,9 +1094,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
server_sent_events.push_back(json {
{"event", "response.content_part.done"},
{"data", json {
{"type", "response.content_part.done"},
{"item_id", oai_resp_message_id},
{"part", content_part}
{"type", "response.content_part.done"},
{"sequence_number", seq_num++},
{"output_index", output_idx},
{"content_index", 0},
{"item_id", oai_resp_message_id},
{"part", content_part},
}}
});
const json output_item = {
@ -1049,50 +1113,52 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
server_sent_events.push_back(json {
{"event", "response.output_item.done"},
{"data", json {
{"type", "response.output_item.done"},
{"item", output_item}
{"type", "response.output_item.done"},
{"sequence_number", seq_num++},
{"output_index", output_idx},
{"item", output_item},
}}
});
output.push_back(output_item);
output_idx++;
}
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) {
const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx];
const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size()
? oai_resp_fc_item_ids[tc_idx]
: "fc_" + random_string(); // fallback for non-streaming path
const json output_item = {
{"type", "function_call"},
{"status", "completed"},
{"id", fc_id},
{"call_id", tool_call.id},
{"name", tool_call.name},
{"arguments", tool_call.arguments},
{"call_id", "fc_" + tool_call.id},
{"name", tool_call.name}
{"status", "completed"},
};
server_sent_events.push_back(json {
{"event", "response.output_item.done"},
{"data", json {
{"type", "response.output_item.done"},
{"item", output_item}
{"type", "response.output_item.done"},
{"sequence_number", seq_num++},
{"output_index", output_idx},
{"item", output_item},
}}
});
output.push_back(output_item);
output_idx++;
}
std::time_t t = std::time(0);
std::string output_text = build_output_text(output);
json resp = build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
server_sent_events.push_back(json {
{"event", "response.completed"},
{"data", json {
{"type", "response.completed"},
{"response", json {
{"id", oai_resp_id},
{"object", "response"},
{"created_at", t},
{"status", "completed"},
{"model", oaicompat_model},
{"output", output},
{"usage", json {
{"input_tokens", n_prompt_tokens},
{"output_tokens", n_decoded},
{"total_tokens", n_decoded + n_prompt_tokens},
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
}}
}},
{"type", "response.completed"},
{"sequence_number", seq_num++},
{"response", resp},
}}
});
@ -1368,20 +1434,44 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
oai_resp_message_id = state.oai_resp_message_id;
oai_resp_fc_id = state.oai_resp_fc_id;
oai_resp_fc_item_id = state.oai_resp_fc_item_id;
oai_resp_seq_num = state.oai_resp_seq_num;
oai_resp_output_idx = state.oai_resp_output_idx;
// track if the accumulated message has any reasoning content
anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
// Pre-compute state updates based on diffs (for next chunk)
// Also advance seq_num/output_idx to match events that to_json_oaicompat_resp() will emit
if (n_decoded == 1) {
state.oai_resp_seq_num += 2; // response.created + response.in_progress
}
for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) {
state.thinking_block_started = true;
if (!diff.reasoning_content_delta.empty()) {
if (!state.thinking_block_started) {
state.thinking_block_started = true;
state.oai_resp_seq_num++; // output_item.added
state.oai_resp_output_idx++;
}
state.oai_resp_seq_num++; // reasoning_text.delta
}
if (!diff.content_delta.empty() && !state.text_block_started) {
state.text_block_started = true;
if (!diff.content_delta.empty()) {
if (!state.text_block_started) {
state.text_block_started = true;
state.oai_resp_seq_num += 2; // output_item.added + content_part.added
state.oai_resp_output_idx++;
}
state.oai_resp_seq_num++; // output_text.delta
}
if (!diff.tool_call_delta.name.empty()) {
state.oai_resp_fc_id = diff.tool_call_delta.id;
state.oai_resp_fc_item_id = "fc_" + random_string();
state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id);
state.oai_resp_seq_num++; // output_item.added
state.oai_resp_output_idx++;
}
if (!diff.tool_call_delta.arguments.empty()) {
state.oai_resp_seq_num++; // function_call_arguments.delta
}
}
}
@ -1523,28 +1613,29 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
std::vector<json> events;
int & seq_num = oai_resp_seq_num;
int & output_idx = oai_resp_output_idx;
if (n_decoded == 1) {
// Build initial response object with all required fields but empty output and zeroed usage
json initial_resp = build_oai_resp_metadata(
oai_resp_id, oaicompat_model, {}, "",
0, 0, 0, "in_progress");
events.push_back(json {
{"event", "response.created"},
{"data", json {
{"type", "response.created"},
{"response", json {
{"id", oai_resp_id},
{"object", "response"},
{"status", "in_progress"},
}},
{"type", "response.created"},
{"sequence_number", seq_num++},
{"response", initial_resp},
}},
});
events.push_back(json {
{"event", "response.in_progress"},
{"data", json {
{"type", "response.in_progress"},
{"response", json {
{"id", oai_resp_id},
{"object", "response"},
{"status", "in_progress"},
}},
{"type", "response.in_progress"},
{"sequence_number", seq_num++},
{"response", initial_resp},
}},
});
}
@ -1555,7 +1646,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
events.push_back(json {
{"event", "response.output_item.added"},
{"data", json {
{"type", "response.output_item.added"},
{"type", "response.output_item.added"},
{"sequence_number", seq_num++},
{"output_index", output_idx++},
{"item", json {
{"id", oai_resp_reasoning_id},
{"summary", json::array()},
@ -1571,9 +1664,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
events.push_back(json {
{"event", "response.reasoning_text.delta"},
{"data", json {
{"type", "response.reasoning_text.delta"},
{"delta", diff.reasoning_content_delta},
{"item_id", oai_resp_reasoning_id},
{"type", "response.reasoning_text.delta"},
{"sequence_number", seq_num++},
{"output_index", output_idx - 1},
{"content_index", 0},
{"delta", diff.reasoning_content_delta},
{"item_id", oai_resp_reasoning_id},
}},
});
}
@ -1583,7 +1679,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
events.push_back(json {
{"event", "response.output_item.added"},
{"data", json {
{"type", "response.output_item.added"},
{"type", "response.output_item.added"},
{"sequence_number", seq_num++},
{"output_index", output_idx++},
{"item", json {
{"content", json::array()},
{"id", oai_resp_message_id},
@ -1596,8 +1694,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
events.push_back(json {
{"event", "response.content_part.added"},
{"data", json {
{"type", "response.content_part.added"},
{"item_id", oai_resp_message_id},
{"type", "response.content_part.added"},
{"sequence_number", seq_num++},
{"output_index", output_idx - 1},
{"content_index", 0},
{"item_id", oai_resp_message_id},
{"part", json {
{"type", "output_text"},
{"text", ""},
@ -1609,9 +1710,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
events.push_back(json {
{"event", "response.output_text.delta"},
{"data", json {
{"type", "response.output_text.delta"},
{"item_id", oai_resp_message_id},
{"delta", diff.content_delta},
{"type", "response.output_text.delta"},
{"sequence_number", seq_num++},
{"output_index", output_idx - 1},
{"content_index", 0},
{"item_id", oai_resp_message_id},
{"delta", diff.content_delta},
}},
});
}
@ -1620,26 +1724,30 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
events.push_back(json {
{"event", "response.output_item.added"},
{"data", json {
{"type", "response.output_item.added"},
{"type", "response.output_item.added"},
{"sequence_number", seq_num++},
{"output_index", output_idx++},
{"item", json {
{"id", oai_resp_fc_item_id},
{"arguments", ""},
{"call_id", "fc_" + diff.tool_call_delta.id},
{"call_id", diff.tool_call_delta.id},
{"name", diff.tool_call_delta.name},
{"type", "function_call"},
{"status", "in_progress"},
}},
}},
});
oai_resp_fc_id = diff.tool_call_delta.id;
}
if (!diff.tool_call_delta.arguments.empty()) {
events.push_back(json {
{"event", "response.function_call_arguments.delta"},
{"data", json {
{"type", "response.function_call_arguments.delta"},
{"delta", diff.tool_call_delta.arguments},
{"item_id", "fc_" + oai_resp_fc_id},
{"type", "response.function_call_arguments.delta"},
{"sequence_number", seq_num++},
{"output_index", output_idx - 1},
{"delta", diff.tool_call_delta.arguments},
{"item_id", oai_resp_fc_item_id},
}},
});
}

View File

@ -109,7 +109,11 @@ struct task_result_state {
const std::string oai_resp_id;
const std::string oai_resp_reasoning_id;
const std::string oai_resp_message_id;
std::string oai_resp_fc_id; // function call ID for current args delta
std::string oai_resp_fc_id; // model's tool_call ID for current function call
std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call
std::vector<std::string> oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance
int oai_resp_seq_num = 0; // monotonically increasing per-stream
int oai_resp_output_idx = 0; // tracks current output item index
task_result_state(const common_chat_parser_params & chat_parser_params)
: chat_parser_params(chat_parser_params)
@ -370,6 +374,8 @@ struct server_task_result_cmpl_final : server_task_result {
std::string oai_resp_id;
std::string oai_resp_reasoning_id;
std::string oai_resp_message_id;
std::vector<std::string> oai_resp_fc_item_ids;
int oai_resp_seq_num = 0;
virtual bool is_stop() override {
return true; // in stream mode, final responses are considered stop
@ -384,6 +390,8 @@ struct server_task_result_cmpl_final : server_task_result {
oai_resp_id = state.oai_resp_id;
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
oai_resp_message_id = state.oai_resp_message_id;
oai_resp_fc_item_ids = state.oai_resp_fc_item_ids;
oai_resp_seq_num = state.oai_resp_seq_num;
}
json to_json_non_oaicompat();
@ -436,6 +444,9 @@ struct server_task_result_cmpl_partial : server_task_result {
std::string oai_resp_reasoning_id;
std::string oai_resp_message_id;
std::string oai_resp_fc_id;
std::string oai_resp_fc_item_id;
int oai_resp_seq_num = 0;
int oai_resp_output_idx = 0;
// for Anthropic API: track if any reasoning content has been generated
bool anthropic_has_reasoning = false;

View File

@ -71,3 +71,524 @@ def test_responses_stream_with_openai_library():
assert r.response.output[0].id.startswith("msg_")
assert gathered_text == r.response.output_text
assert match_regex("(Suddenly)+", r.response.output_text)
def test_responses_schema_fields():
"""Verify the 24 Response object fields added by this PR are present
with correct types and default values. These fields are required by
the OpenAI Responses API spec but were missing before this change."""
global server
server.start()
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": "Book",
"max_output_tokens": 8,
"temperature": 0.8,
})
assert res.status_code == 200
body = res.body
# Usage sub-fields added by this PR
usage = body["usage"]
assert isinstance(usage["input_tokens_details"]["cached_tokens"], int)
assert isinstance(usage["output_tokens_details"]["reasoning_tokens"], int)
# All 24 fields added by this PR must be present with correct defaults
assert body["incomplete_details"] is None
assert body["previous_response_id"] is None
assert body["instructions"] is None
assert body["error"] is None
assert body["tools"] == []
assert body["tool_choice"] == "auto"
assert body["truncation"] == "disabled"
assert body["parallel_tool_calls"] == False
assert body["text"] == {"format": {"type": "text"}}
assert body["top_p"] == 1.0
assert body["temperature"] == 1.0
assert body["presence_penalty"] == 0.0
assert body["frequency_penalty"] == 0.0
assert body["top_logprobs"] == 0
assert body["reasoning"] is None
assert body["max_output_tokens"] is None
assert body["store"] == False
assert body["service_tier"] == "default"
assert body["metadata"] == {}
assert body["background"] == False
assert body["safety_identifier"] is None
assert body["prompt_cache_key"] is None
assert body["max_tool_calls"] is None
def test_responses_stream_schema_fields():
"""Verify streaming done-events have the sequence_number, output_index,
and content_index fields added by this PR. Also verify the completed
response includes the 24 new schema fields."""
global server
server.start()
res = server.make_stream_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": "Book",
"max_output_tokens": 8,
"temperature": 0.8,
"stream": True,
})
seen_seq_nums = []
saw_output_text_done = False
saw_content_part_done = False
saw_output_item_done = False
completed_response = None
for data in res:
assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}"
seen_seq_nums.append(data["sequence_number"])
if data.get("type") == "response.output_text.done":
saw_output_text_done = True
assert "content_index" in data
assert "output_index" in data
assert "logprobs" in data
assert isinstance(data["logprobs"], list)
if data.get("type") == "response.content_part.done":
saw_content_part_done = True
assert "content_index" in data
assert "output_index" in data
if data.get("type") == "response.output_item.done":
saw_output_item_done = True
assert "output_index" in data
if data.get("type") == "response.completed":
completed_response = data["response"]
# Must have seen all done-event types
assert saw_output_text_done, "never received response.output_text.done"
assert saw_content_part_done, "never received response.content_part.done"
assert saw_output_item_done, "never received response.output_item.done"
# sequence_number must be present on done events and monotonically increasing
assert len(seen_seq_nums) >= 4, f"expected >= 4 sequenced events, got {len(seen_seq_nums)}"
assert all(a < b for a, b in zip(seen_seq_nums, seen_seq_nums[1:])), "sequence_numbers not strictly increasing"
# completed response must have the new schema fields with correct values
assert completed_response is not None
assert completed_response["metadata"] == {}
assert completed_response["store"] == False
assert completed_response["truncation"] == "disabled"
assert completed_response["usage"]["output_tokens_details"]["reasoning_tokens"] == 0
def test_responses_non_function_tool_skipped():
"""Non-function tool types must be silently skipped, producing a valid
completion with no tools field in the converted chat request. Upstream
rejects non-function types with 400; our code must return 200 and
generate output as if no tools were provided."""
global server
server.start()
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
"tools": [
{"type": "web_search"},
{"type": "code_interpreter"},
],
})
assert res.status_code == 200
assert res.body["status"] == "completed"
# With all tools skipped, the model must still produce text output
assert len(res.body["output"]) > 0
assert len(res.body["output_text"]) > 0
def test_responses_only_non_function_tools_same_as_no_tools():
"""When ALL tools are non-function types, they should all be filtered out
and the result should be identical to a request with no tools at all.
Compare token counts to confirm the tools field was truly empty."""
global server
server.start()
no_tools = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
with_skipped_tools = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
"tools": [
{"type": "web_search"},
{"type": "code_interpreter"},
{"type": "file_search"},
],
})
assert no_tools.status_code == 200
assert with_skipped_tools.status_code == 200
# If tools were truly stripped, prompt token count must be identical
assert with_skipped_tools.body["usage"]["input_tokens"] == no_tools.body["usage"]["input_tokens"]
def test_responses_extra_keys_stripped():
"""Responses-only request keys (store, include, prompt_cache_key, etc.)
must be stripped before forwarding to the chat completions handler.
The completion must succeed and produce the same output as a request
without those keys."""
global server
server.start()
# Baseline without extra keys
baseline = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert baseline.status_code == 200
# Same request with extra Responses-only keys
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
"store": True,
"include": ["usage"],
"prompt_cache_key": "test_key",
"web_search": {"enabled": True},
"text": {"format": {"type": "text"}},
"truncation": "auto",
"metadata": {"key": "value"},
})
assert res.status_code == 200
assert res.body["status"] == "completed"
# Extra keys must not affect token consumption
assert res.body["usage"]["input_tokens"] == baseline.body["usage"]["input_tokens"]
def test_responses_developer_role_merging():
"""Developer role messages must be merged into the first system message
at position 0. This ensures templates that require a single system
message don't see developer content as a separate turn.
We verify by comparing token counts: system + developer merged should
consume the same prompt tokens as a single system message with the
combined content."""
global server
server.start()
# Single combined system message
combined = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": [
{"type": "input_text", "text": "Book"},
{"type": "input_text", "text": "Keep it short"},
]},
{"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert combined.status_code == 200
# Split system + developer (should be merged to same prompt)
split = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": [{"type": "input_text", "text": "Book"}]},
{"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
{"role": "developer", "content": [{"type": "input_text", "text": "Keep it short"}]},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert split.status_code == 200
assert split.body["status"] == "completed"
# Merged prompt should consume same number of input tokens
assert split.body["usage"]["input_tokens"] == combined.body["usage"]["input_tokens"]
def test_responses_input_text_type_multi_turn():
"""input_text type must be accepted for assistant messages (EasyInputMessage).
An assistant message without explicit type:'message' must also be accepted
(AssistantMessageItemParam). Verify the multi-turn context is preserved
by checking the model sees the full conversation."""
global server
server.start()
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "user", "content": [{"type": "input_text", "text": "Hello"}]},
{
"role": "assistant",
"content": [{"type": "input_text", "text": "Hi there"}],
},
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert res.status_code == 200
assert res.body["status"] == "completed"
# Multi-turn input should result in more prompt tokens than single-turn
single = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": "How are you",
"max_output_tokens": 8,
"temperature": 0.8,
})
assert single.status_code == 200
assert res.body["usage"]["input_tokens"] > single.body["usage"]["input_tokens"]
def test_responses_output_text_matches_content():
"""output_text must be the concatenation of all output_text content parts.
Verify this for both streaming and non-streaming responses."""
global server
server.start()
# Non-streaming
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert res.status_code == 200
# Manually reconstruct output_text from content parts
reconstructed = ""
for item in res.body["output"]:
if item.get("type") == "message":
for part in item["content"]:
if part.get("type") == "output_text":
reconstructed += part["text"]
assert res.body["output_text"] == reconstructed
assert len(reconstructed) > 0
def test_responses_stream_output_text_consistency():
"""Streaming gathered text must match the output_text in response.completed."""
global server
server.start()
res = server.make_stream_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
"stream": True,
})
gathered_text = ""
completed_output_text = None
for data in res:
if data.get("type") == "response.output_text.delta":
gathered_text += data["delta"]
if data.get("type") == "response.completed":
completed_output_text = data["response"]["output_text"]
# Also verify content parts match
for item in data["response"]["output"]:
if item.get("type") == "message":
for part in item["content"]:
if part.get("type") == "output_text":
assert part["text"] == gathered_text
assert completed_output_text is not None
assert gathered_text == completed_output_text
assert len(gathered_text) > 0
def test_responses_stream_created_event_has_full_response():
"""response.created must contain the full response object with all required
fields, not just {id, object, status}. This is needed by strict client
libraries like async-openai."""
global server
server.start()
res = server.make_stream_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
"stream": True,
})
created_resp = None
in_progress_resp = None
for data in res:
if data.get("type") == "response.created":
created_resp = data["response"]
if data.get("type") == "response.in_progress":
in_progress_resp = data["response"]
assert created_resp is not None, "never received response.created"
assert in_progress_resp is not None, "never received response.in_progress"
# Both must have the full response object, not just minimal fields
for resp in [created_resp, in_progress_resp]:
assert resp["status"] == "in_progress"
assert resp["id"].startswith("resp_")
assert resp["object"] == "response"
assert resp["model"] is not None
assert resp["completed_at"] is None
assert resp["metadata"] == {}
assert resp["store"] == False
assert resp["truncation"] == "disabled"
assert resp["tools"] == []
assert resp["usage"]["input_tokens"] == 0
assert resp["usage"]["output_tokens"] == 0
assert resp["output"] == []
assert resp["output_text"] == ""
def test_responses_stream_all_events_have_sequence_number():
"""Every streaming event must have a sequence_number field and they must
be strictly increasing across the entire stream."""
global server
server.start()
res = server.make_stream_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
"stream": True,
})
all_seq_nums = []
event_types = []
for data in res:
assert "sequence_number" in data, f"missing sequence_number in event type {data.get('type')}"
all_seq_nums.append(data["sequence_number"])
event_types.append(data.get("type", "unknown"))
# Must have received multiple events
assert len(all_seq_nums) >= 6, f"expected >= 6 events, got {len(all_seq_nums)}: {event_types}"
# Must be strictly increasing
for i in range(1, len(all_seq_nums)):
assert all_seq_nums[i] > all_seq_nums[i-1], \
f"sequence_number not strictly increasing at index {i}: {all_seq_nums[i-1]} -> {all_seq_nums[i]} (events: {event_types[i-1]} -> {event_types[i]})"
def test_responses_stream_delta_events_have_indices():
"""Delta and added events must have output_index. Content-related events
must also have content_index."""
global server
server.start()
res = server.make_stream_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "system", "content": "Book"},
{"role": "user", "content": "What is the best book"},
],
"max_output_tokens": 8,
"temperature": 0.8,
"stream": True,
})
saw_output_item_added = False
saw_content_part_added = False
saw_output_text_delta = False
for data in res:
evt = data.get("type", "")
if evt == "response.output_item.added":
saw_output_item_added = True
assert "output_index" in data, "output_item.added missing output_index"
if evt == "response.content_part.added":
saw_content_part_added = True
assert "output_index" in data, "content_part.added missing output_index"
assert "content_index" in data, "content_part.added missing content_index"
if evt == "response.output_text.delta":
saw_output_text_delta = True
assert "output_index" in data, "output_text.delta missing output_index"
assert "content_index" in data, "output_text.delta missing content_index"
assert saw_output_item_added, "never received response.output_item.added"
assert saw_content_part_added, "never received response.content_part.added"
assert saw_output_text_delta, "never received response.output_text.delta"
def test_responses_reasoning_content_array():
"""Reasoning items with content as array (spec format) must be accepted."""
global server
server.start()
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
{"type": "reasoning", "summary": [],
"content": [{"type": "reasoning_text", "text": "thinking"}]},
{"role": "assistant", "type": "message",
"content": [{"type": "output_text", "text": "Hello"}]},
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert res.status_code == 200
assert res.body["status"] == "completed"
def test_responses_reasoning_content_string():
"""Reasoning items with content as plain string (OpenCode format) must be accepted."""
global server
server.start()
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
{"type": "reasoning", "summary": [], "content": "thinking about it"},
{"role": "assistant", "type": "message",
"content": [{"type": "output_text", "text": "Hello"}]},
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert res.status_code == 200
assert res.body["status"] == "completed"
def test_responses_reasoning_content_null():
"""Reasoning items with content:null (Codex format, issue openai/codex#11834)
must be accepted content may be null when encrypted_content is present."""
global server
server.start()
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
{"type": "reasoning", "summary": [], "content": None,
"encrypted_content": "opaque_data_here"},
{"role": "assistant", "type": "message",
"content": [{"type": "output_text", "text": "Hello"}]},
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert res.status_code == 200
assert res.body["status"] == "completed"
def test_responses_reasoning_content_omitted():
"""Reasoning items with content omitted entirely must be accepted."""
global server
server.start()
res = server.make_request("POST", "/v1/responses", data={
"model": "gpt-4.1",
"input": [
{"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
{"type": "reasoning", "summary": []},
{"role": "assistant", "type": "message",
"content": [{"type": "output_text", "text": "Hello"}]},
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
],
"max_output_tokens": 8,
"temperature": 0.8,
})
assert res.status_code == 200
assert res.body["status"] == "completed"