Merge adef64cb9f into 6b949d1078
This commit is contained in:
commit
53ecea86b5
|
|
@ -1255,45 +1255,59 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
|||
if (item.contains("status")) {
|
||||
item.erase("status");
|
||||
}
|
||||
// Merge system/developer messages into the first system message.
|
||||
// Many model templates (e.g. Qwen) require all system content at
|
||||
// position 0 and reject system messages elsewhere in the conversation.
|
||||
if (item.at("role") == "system" || item.at("role") == "developer") {
|
||||
if (!chatcmpl_messages.empty() && chatcmpl_messages[0].value("role", "") == "system") {
|
||||
auto & first_msg = chatcmpl_messages[0];
|
||||
// Convert string content to array format if needed
|
||||
if (first_msg["content"].is_string()) {
|
||||
std::string old_text = first_msg["content"].get<std::string>();
|
||||
first_msg["content"] = json::array({json{{"text", old_text}, {"type", "text"}}});
|
||||
}
|
||||
auto & first_content = first_msg["content"];
|
||||
for (const auto & part : chatcmpl_content) {
|
||||
first_content.push_back(part);
|
||||
}
|
||||
continue; // merged, don't push a separate message
|
||||
}
|
||||
item["role"] = "system";
|
||||
}
|
||||
item["content"] = chatcmpl_content;
|
||||
|
||||
chatcmpl_messages.push_back(item);
|
||||
} else if (exists_and_is_array(item, "content") &&
|
||||
exists_and_is_string(item, "role") &&
|
||||
item.at("role") == "assistant" &&
|
||||
// exists_and_is_string(item, "status") &&
|
||||
// (item.at("status") == "in_progress" ||
|
||||
// item.at("status") == "completed" ||
|
||||
// item.at("status") == "incomplete") &&
|
||||
// item["status"] not sent by codex-cli
|
||||
exists_and_is_string(item, "type") &&
|
||||
item.at("type") == "message"
|
||||
// status not checked (not always present, e.g. codex-cli omits it)
|
||||
// type == "message" for OutputMessage, absent for EasyInputMessage
|
||||
(!item.contains("type") || item.at("type") == "message")
|
||||
) {
|
||||
// #responses_create-input-input_item_list-item-output_message
|
||||
auto chatcmpl_content = json::array();
|
||||
// Also handles AssistantMessageItemParam / EasyInputMessage with role "assistant"
|
||||
std::vector<json> chatcmpl_content;
|
||||
|
||||
for (const auto & output_text : item.at("content")) {
|
||||
const std::string type = json_value(output_text, "type", std::string());
|
||||
if (type == "output_text") {
|
||||
if (type == "output_text" || type == "input_text") {
|
||||
if (!exists_and_is_string(output_text, "text")) {
|
||||
throw std::invalid_argument("'Output text' requires 'text'");
|
||||
// Ignore annotations and logprobs for now
|
||||
chatcmpl_content.push_back({
|
||||
{"text", output_text.at("text")},
|
||||
{"type", "text"},
|
||||
});
|
||||
}
|
||||
chatcmpl_content.push_back({
|
||||
{"text", output_text.at("text")},
|
||||
{"type", "text"},
|
||||
});
|
||||
} else if (type == "refusal") {
|
||||
if (!exists_and_is_string(output_text, "refusal")) {
|
||||
throw std::invalid_argument("'Refusal' requires 'refusal'");
|
||||
// Ignore annotations and logprobs for now
|
||||
chatcmpl_content.push_back({
|
||||
{"refusal", output_text.at("refusal")},
|
||||
{"type", "refusal"},
|
||||
});
|
||||
}
|
||||
chatcmpl_content.push_back({
|
||||
{"refusal", output_text.at("refusal")},
|
||||
{"type", "refusal"},
|
||||
});
|
||||
} else {
|
||||
throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'");
|
||||
throw std::invalid_argument("'type' must be 'output_text', 'input_text', or 'refusal'");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1303,7 +1317,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
|||
prev_msg["content"] = json::array();
|
||||
}
|
||||
auto & prev_content = prev_msg["content"];
|
||||
prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end());
|
||||
for (const auto & part : chatcmpl_content) {
|
||||
prev_content.push_back(part);
|
||||
}
|
||||
} else {
|
||||
item.erase("status");
|
||||
item.erase("type");
|
||||
|
|
@ -1371,24 +1387,30 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
|||
item.at("type") == "reasoning") {
|
||||
// #responses_create-input-input_item_list-item-reasoning
|
||||
|
||||
if (!exists_and_is_array(item, "content")) {
|
||||
throw std::invalid_argument("item['content'] is not an array");
|
||||
}
|
||||
if (item.at("content").empty()) {
|
||||
throw std::invalid_argument("item['content'] is empty");
|
||||
}
|
||||
if (!exists_and_is_string(item.at("content")[0], "text")) {
|
||||
throw std::invalid_argument("item['content']['text'] is not a string");
|
||||
// content can be: null, omitted, a string, or array of {type, text} objects.
|
||||
// Codex may send content:null or omit it entirely (issue openai/codex#11834).
|
||||
// OpenCode may send content as a plain string.
|
||||
// The spec uses array format: [{"type":"reasoning_text","text":"..."}].
|
||||
// encrypted_content (opaque string) is accepted but ignored for local models.
|
||||
std::string reasoning_text;
|
||||
if (!item.contains("content") || item.at("content").is_null()) {
|
||||
// null or missing content — skip (encrypted_content only, or empty reasoning)
|
||||
} else if (item.at("content").is_string()) {
|
||||
reasoning_text = item.at("content").get<std::string>();
|
||||
} else if (item.at("content").is_array() && !item.at("content").empty()
|
||||
&& exists_and_is_string(item.at("content")[0], "text")) {
|
||||
reasoning_text = item.at("content")[0].at("text").get<std::string>();
|
||||
}
|
||||
// else: empty array or unrecognized format — treat as empty reasoning
|
||||
|
||||
if (merge_prev) {
|
||||
auto & prev_msg = chatcmpl_messages.back();
|
||||
prev_msg["reasoning_content"] = item.at("content")[0].at("text");
|
||||
prev_msg["reasoning_content"] = reasoning_text;
|
||||
} else {
|
||||
chatcmpl_messages.push_back(json {
|
||||
{"role", "assistant"},
|
||||
{"content", json::array()},
|
||||
{"reasoning_content", item.at("content")[0].at("text")},
|
||||
{"reasoning_content", reasoning_text},
|
||||
});
|
||||
}
|
||||
} else {
|
||||
|
|
@ -1407,11 +1429,17 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
|||
}
|
||||
std::vector<json> chatcmpl_tools;
|
||||
for (json resp_tool : response_body.at("tools")) {
|
||||
json chatcmpl_tool;
|
||||
const std::string tool_type = json_value(resp_tool, "type", std::string());
|
||||
|
||||
if (json_value(resp_tool, "type", std::string()) != "function") {
|
||||
throw std::invalid_argument("'type' of tool must be 'function'");
|
||||
// Skip non-function tools (e.g. web_search, code_interpreter)
|
||||
// sent by clients like Codex CLI — these are provider-specific
|
||||
// and cannot be converted to chat completions function tools
|
||||
if (tool_type != "function") {
|
||||
SRV_WRN("skipping unsupported tool type '%s' in Responses conversion\n", tool_type.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
json chatcmpl_tool;
|
||||
resp_tool.erase("type");
|
||||
chatcmpl_tool["type"] = "function";
|
||||
|
||||
|
|
@ -1422,7 +1450,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
|||
chatcmpl_tools.push_back(chatcmpl_tool);
|
||||
}
|
||||
chatcmpl_body.erase("tools");
|
||||
chatcmpl_body["tools"] = chatcmpl_tools;
|
||||
if (!chatcmpl_tools.empty()) {
|
||||
chatcmpl_body["tools"] = chatcmpl_tools;
|
||||
}
|
||||
}
|
||||
|
||||
if (response_body.contains("max_output_tokens")) {
|
||||
|
|
@ -1430,6 +1460,15 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
|||
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
|
||||
}
|
||||
|
||||
// Strip Responses-only keys that have no chat completions equivalent
|
||||
// (e.g. Codex CLI sends store, include, prompt_cache_key, web_search)
|
||||
for (const char * key : {
|
||||
"store", "include", "prompt_cache_key", "web_search",
|
||||
"text", "truncation", "metadata",
|
||||
}) {
|
||||
chatcmpl_body.erase(key);
|
||||
}
|
||||
|
||||
return chatcmpl_body;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -917,6 +917,71 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
|
|||
return deltas;
|
||||
}
|
||||
|
||||
static std::string build_output_text(const std::vector<json> & output) {
|
||||
std::string result;
|
||||
for (const auto & item : output) {
|
||||
if (json_value(item, "type", std::string()) == "message") {
|
||||
for (const auto & part : item.at("content")) {
|
||||
if (json_value(part, "type", std::string()) == "output_text") {
|
||||
result += part.at("text").get<std::string>();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static json build_oai_resp_metadata(const std::string & oai_resp_id,
|
||||
const std::string & oaicompat_model,
|
||||
const std::vector<json> & output,
|
||||
const std::string & output_text,
|
||||
int n_prompt_tokens,
|
||||
int n_decoded,
|
||||
int n_prompt_tokens_cache,
|
||||
const std::string & status = "completed") {
|
||||
std::time_t t = std::time(0);
|
||||
return json {
|
||||
{"completed_at", status == "completed" ? json(t) : json(nullptr)},
|
||||
{"created_at", t},
|
||||
{"id", oai_resp_id},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "response"},
|
||||
{"output", output},
|
||||
{"output_text", output_text},
|
||||
{"status", status},
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
||||
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
||||
}},
|
||||
{"incomplete_details", nullptr},
|
||||
{"previous_response_id", nullptr},
|
||||
{"instructions", nullptr},
|
||||
{"error", nullptr},
|
||||
{"tools", json::array()},
|
||||
{"tool_choice", "auto"},
|
||||
{"truncation", "disabled"},
|
||||
{"parallel_tool_calls", false},
|
||||
{"text", json{{"format", json{{"type", "text"}}}}},
|
||||
{"top_p", 1.0},
|
||||
{"presence_penalty", 0.0},
|
||||
{"frequency_penalty", 0.0},
|
||||
{"top_logprobs", 0},
|
||||
{"temperature", 1.0},
|
||||
{"reasoning", nullptr},
|
||||
{"max_output_tokens", nullptr},
|
||||
{"max_tool_calls", nullptr},
|
||||
{"store", false},
|
||||
{"background", false},
|
||||
{"service_tier", "default"},
|
||||
{"safety_identifier", nullptr},
|
||||
{"prompt_cache_key", nullptr},
|
||||
{"metadata", json::object()},
|
||||
};
|
||||
}
|
||||
|
||||
json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
||||
common_chat_msg msg;
|
||||
if (!oaicompat_msg.empty()) {
|
||||
|
|
@ -960,36 +1025,24 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
|||
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
||||
output.push_back(json {
|
||||
{"type", "function_call"},
|
||||
{"status", "completed"},
|
||||
{"arguments", tool_call.arguments},
|
||||
{"call_id", "fc_" + tool_call.id},
|
||||
{"id", "fc_" + random_string()},
|
||||
{"call_id", tool_call.id},
|
||||
{"name", tool_call.name},
|
||||
{"arguments", tool_call.arguments},
|
||||
{"status", "completed"},
|
||||
});
|
||||
}
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
json res = {
|
||||
{"completed_at", t},
|
||||
{"created_at", t},
|
||||
{"id", oai_resp_id},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "response"},
|
||||
{"output", output},
|
||||
{"status", "completed"},
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||
}},
|
||||
};
|
||||
|
||||
return res;
|
||||
std::string output_text = build_output_text(output);
|
||||
return build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
|
||||
n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
|
||||
}
|
||||
|
||||
json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||
std::vector<json> server_sent_events;
|
||||
std::vector<json> output;
|
||||
int & seq_num = oai_resp_seq_num;
|
||||
int output_idx = 0;
|
||||
|
||||
if (oaicompat_msg.reasoning_content != "") {
|
||||
const json output_item = json {
|
||||
|
|
@ -1001,25 +1054,33 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
{"type", "reasoning_text"},
|
||||
}})},
|
||||
{"encrypted_content", ""},
|
||||
{"status", "completed"},
|
||||
};
|
||||
|
||||
server_sent_events.push_back(json {
|
||||
{"event", "response.output_item.done"},
|
||||
{"data", json {
|
||||
{"type", "response.output_item.done"},
|
||||
{"item", output_item}
|
||||
{"type", "response.output_item.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx},
|
||||
{"item", output_item},
|
||||
}}
|
||||
});
|
||||
output.push_back(output_item);
|
||||
output_idx++;
|
||||
}
|
||||
|
||||
if (oaicompat_msg.content != "") {
|
||||
server_sent_events.push_back(json {
|
||||
{"event", "response.output_text.done"},
|
||||
{"data", json {
|
||||
{"type", "response.output_text.done"},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"text", oaicompat_msg.content}
|
||||
{"type", "response.output_text.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx},
|
||||
{"content_index", 0},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"text", oaicompat_msg.content},
|
||||
{"logprobs", json::array()},
|
||||
}}
|
||||
});
|
||||
|
||||
|
|
@ -1033,9 +1094,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
server_sent_events.push_back(json {
|
||||
{"event", "response.content_part.done"},
|
||||
{"data", json {
|
||||
{"type", "response.content_part.done"},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"part", content_part}
|
||||
{"type", "response.content_part.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx},
|
||||
{"content_index", 0},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"part", content_part},
|
||||
}}
|
||||
});
|
||||
const json output_item = {
|
||||
|
|
@ -1049,50 +1113,52 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
server_sent_events.push_back(json {
|
||||
{"event", "response.output_item.done"},
|
||||
{"data", json {
|
||||
{"type", "response.output_item.done"},
|
||||
{"item", output_item}
|
||||
{"type", "response.output_item.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx},
|
||||
{"item", output_item},
|
||||
}}
|
||||
});
|
||||
output.push_back(output_item);
|
||||
output_idx++;
|
||||
}
|
||||
|
||||
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
||||
for (size_t tc_idx = 0; tc_idx < oaicompat_msg.tool_calls.size(); tc_idx++) {
|
||||
const common_chat_tool_call & tool_call = oaicompat_msg.tool_calls[tc_idx];
|
||||
const std::string fc_id = tc_idx < oai_resp_fc_item_ids.size()
|
||||
? oai_resp_fc_item_ids[tc_idx]
|
||||
: "fc_" + random_string(); // fallback for non-streaming path
|
||||
const json output_item = {
|
||||
{"type", "function_call"},
|
||||
{"status", "completed"},
|
||||
{"id", fc_id},
|
||||
{"call_id", tool_call.id},
|
||||
{"name", tool_call.name},
|
||||
{"arguments", tool_call.arguments},
|
||||
{"call_id", "fc_" + tool_call.id},
|
||||
{"name", tool_call.name}
|
||||
{"status", "completed"},
|
||||
};
|
||||
server_sent_events.push_back(json {
|
||||
{"event", "response.output_item.done"},
|
||||
{"data", json {
|
||||
{"type", "response.output_item.done"},
|
||||
{"item", output_item}
|
||||
{"type", "response.output_item.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx},
|
||||
{"item", output_item},
|
||||
}}
|
||||
});
|
||||
output.push_back(output_item);
|
||||
output_idx++;
|
||||
}
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
std::string output_text = build_output_text(output);
|
||||
json resp = build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
|
||||
n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
|
||||
|
||||
server_sent_events.push_back(json {
|
||||
{"event", "response.completed"},
|
||||
{"data", json {
|
||||
{"type", "response.completed"},
|
||||
{"response", json {
|
||||
{"id", oai_resp_id},
|
||||
{"object", "response"},
|
||||
{"created_at", t},
|
||||
{"status", "completed"},
|
||||
{"model", oaicompat_model},
|
||||
{"output", output},
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||
}}
|
||||
}},
|
||||
{"type", "response.completed"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"response", resp},
|
||||
}}
|
||||
});
|
||||
|
||||
|
|
@ -1368,20 +1434,44 @@ void server_task_result_cmpl_partial::update(task_result_state & state) {
|
|||
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
||||
oai_resp_message_id = state.oai_resp_message_id;
|
||||
oai_resp_fc_id = state.oai_resp_fc_id;
|
||||
oai_resp_fc_item_id = state.oai_resp_fc_item_id;
|
||||
oai_resp_seq_num = state.oai_resp_seq_num;
|
||||
oai_resp_output_idx = state.oai_resp_output_idx;
|
||||
|
||||
// track if the accumulated message has any reasoning content
|
||||
anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
|
||||
|
||||
// Pre-compute state updates based on diffs (for next chunk)
|
||||
// Also advance seq_num/output_idx to match events that to_json_oaicompat_resp() will emit
|
||||
if (n_decoded == 1) {
|
||||
state.oai_resp_seq_num += 2; // response.created + response.in_progress
|
||||
}
|
||||
for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
|
||||
if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) {
|
||||
state.thinking_block_started = true;
|
||||
if (!diff.reasoning_content_delta.empty()) {
|
||||
if (!state.thinking_block_started) {
|
||||
state.thinking_block_started = true;
|
||||
state.oai_resp_seq_num++; // output_item.added
|
||||
state.oai_resp_output_idx++;
|
||||
}
|
||||
state.oai_resp_seq_num++; // reasoning_text.delta
|
||||
}
|
||||
if (!diff.content_delta.empty() && !state.text_block_started) {
|
||||
state.text_block_started = true;
|
||||
if (!diff.content_delta.empty()) {
|
||||
if (!state.text_block_started) {
|
||||
state.text_block_started = true;
|
||||
state.oai_resp_seq_num += 2; // output_item.added + content_part.added
|
||||
state.oai_resp_output_idx++;
|
||||
}
|
||||
state.oai_resp_seq_num++; // output_text.delta
|
||||
}
|
||||
if (!diff.tool_call_delta.name.empty()) {
|
||||
state.oai_resp_fc_id = diff.tool_call_delta.id;
|
||||
state.oai_resp_fc_item_id = "fc_" + random_string();
|
||||
state.oai_resp_fc_item_ids.push_back(state.oai_resp_fc_item_id);
|
||||
state.oai_resp_seq_num++; // output_item.added
|
||||
state.oai_resp_output_idx++;
|
||||
}
|
||||
if (!diff.tool_call_delta.arguments.empty()) {
|
||||
state.oai_resp_seq_num++; // function_call_arguments.delta
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1523,28 +1613,29 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
|
|||
|
||||
json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||
std::vector<json> events;
|
||||
int & seq_num = oai_resp_seq_num;
|
||||
int & output_idx = oai_resp_output_idx;
|
||||
|
||||
if (n_decoded == 1) {
|
||||
// Build initial response object with all required fields but empty output and zeroed usage
|
||||
json initial_resp = build_oai_resp_metadata(
|
||||
oai_resp_id, oaicompat_model, {}, "",
|
||||
0, 0, 0, "in_progress");
|
||||
|
||||
events.push_back(json {
|
||||
{"event", "response.created"},
|
||||
{"data", json {
|
||||
{"type", "response.created"},
|
||||
{"response", json {
|
||||
{"id", oai_resp_id},
|
||||
{"object", "response"},
|
||||
{"status", "in_progress"},
|
||||
}},
|
||||
{"type", "response.created"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"response", initial_resp},
|
||||
}},
|
||||
});
|
||||
events.push_back(json {
|
||||
{"event", "response.in_progress"},
|
||||
{"data", json {
|
||||
{"type", "response.in_progress"},
|
||||
{"response", json {
|
||||
{"id", oai_resp_id},
|
||||
{"object", "response"},
|
||||
{"status", "in_progress"},
|
||||
}},
|
||||
{"type", "response.in_progress"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"response", initial_resp},
|
||||
}},
|
||||
});
|
||||
}
|
||||
|
|
@ -1555,7 +1646,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
events.push_back(json {
|
||||
{"event", "response.output_item.added"},
|
||||
{"data", json {
|
||||
{"type", "response.output_item.added"},
|
||||
{"type", "response.output_item.added"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx++},
|
||||
{"item", json {
|
||||
{"id", oai_resp_reasoning_id},
|
||||
{"summary", json::array()},
|
||||
|
|
@ -1571,9 +1664,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
events.push_back(json {
|
||||
{"event", "response.reasoning_text.delta"},
|
||||
{"data", json {
|
||||
{"type", "response.reasoning_text.delta"},
|
||||
{"delta", diff.reasoning_content_delta},
|
||||
{"item_id", oai_resp_reasoning_id},
|
||||
{"type", "response.reasoning_text.delta"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx - 1},
|
||||
{"content_index", 0},
|
||||
{"delta", diff.reasoning_content_delta},
|
||||
{"item_id", oai_resp_reasoning_id},
|
||||
}},
|
||||
});
|
||||
}
|
||||
|
|
@ -1583,7 +1679,9 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
events.push_back(json {
|
||||
{"event", "response.output_item.added"},
|
||||
{"data", json {
|
||||
{"type", "response.output_item.added"},
|
||||
{"type", "response.output_item.added"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx++},
|
||||
{"item", json {
|
||||
{"content", json::array()},
|
||||
{"id", oai_resp_message_id},
|
||||
|
|
@ -1596,8 +1694,11 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
events.push_back(json {
|
||||
{"event", "response.content_part.added"},
|
||||
{"data", json {
|
||||
{"type", "response.content_part.added"},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"type", "response.content_part.added"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx - 1},
|
||||
{"content_index", 0},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"part", json {
|
||||
{"type", "output_text"},
|
||||
{"text", ""},
|
||||
|
|
@ -1609,9 +1710,12 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
events.push_back(json {
|
||||
{"event", "response.output_text.delta"},
|
||||
{"data", json {
|
||||
{"type", "response.output_text.delta"},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"delta", diff.content_delta},
|
||||
{"type", "response.output_text.delta"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx - 1},
|
||||
{"content_index", 0},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"delta", diff.content_delta},
|
||||
}},
|
||||
});
|
||||
}
|
||||
|
|
@ -1620,26 +1724,30 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|||
events.push_back(json {
|
||||
{"event", "response.output_item.added"},
|
||||
{"data", json {
|
||||
{"type", "response.output_item.added"},
|
||||
{"type", "response.output_item.added"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx++},
|
||||
{"item", json {
|
||||
{"id", oai_resp_fc_item_id},
|
||||
{"arguments", ""},
|
||||
{"call_id", "fc_" + diff.tool_call_delta.id},
|
||||
{"call_id", diff.tool_call_delta.id},
|
||||
{"name", diff.tool_call_delta.name},
|
||||
{"type", "function_call"},
|
||||
{"status", "in_progress"},
|
||||
}},
|
||||
}},
|
||||
});
|
||||
oai_resp_fc_id = diff.tool_call_delta.id;
|
||||
}
|
||||
|
||||
if (!diff.tool_call_delta.arguments.empty()) {
|
||||
events.push_back(json {
|
||||
{"event", "response.function_call_arguments.delta"},
|
||||
{"data", json {
|
||||
{"type", "response.function_call_arguments.delta"},
|
||||
{"delta", diff.tool_call_delta.arguments},
|
||||
{"item_id", "fc_" + oai_resp_fc_id},
|
||||
{"type", "response.function_call_arguments.delta"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", output_idx - 1},
|
||||
{"delta", diff.tool_call_delta.arguments},
|
||||
{"item_id", oai_resp_fc_item_id},
|
||||
}},
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,7 +109,11 @@ struct task_result_state {
|
|||
const std::string oai_resp_id;
|
||||
const std::string oai_resp_reasoning_id;
|
||||
const std::string oai_resp_message_id;
|
||||
std::string oai_resp_fc_id; // function call ID for current args delta
|
||||
std::string oai_resp_fc_id; // model's tool_call ID for current function call
|
||||
std::string oai_resp_fc_item_id; // our generated fc_ item ID for current function call
|
||||
std::vector<std::string> oai_resp_fc_item_ids; // all generated fc_ IDs, in order of tool call appearance
|
||||
int oai_resp_seq_num = 0; // monotonically increasing per-stream
|
||||
int oai_resp_output_idx = 0; // tracks current output item index
|
||||
|
||||
task_result_state(const common_chat_parser_params & chat_parser_params)
|
||||
: chat_parser_params(chat_parser_params)
|
||||
|
|
@ -370,6 +374,8 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
std::string oai_resp_id;
|
||||
std::string oai_resp_reasoning_id;
|
||||
std::string oai_resp_message_id;
|
||||
std::vector<std::string> oai_resp_fc_item_ids;
|
||||
int oai_resp_seq_num = 0;
|
||||
|
||||
virtual bool is_stop() override {
|
||||
return true; // in stream mode, final responses are considered stop
|
||||
|
|
@ -384,6 +390,8 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||
oai_resp_id = state.oai_resp_id;
|
||||
oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
||||
oai_resp_message_id = state.oai_resp_message_id;
|
||||
oai_resp_fc_item_ids = state.oai_resp_fc_item_ids;
|
||||
oai_resp_seq_num = state.oai_resp_seq_num;
|
||||
}
|
||||
|
||||
json to_json_non_oaicompat();
|
||||
|
|
@ -436,6 +444,9 @@ struct server_task_result_cmpl_partial : server_task_result {
|
|||
std::string oai_resp_reasoning_id;
|
||||
std::string oai_resp_message_id;
|
||||
std::string oai_resp_fc_id;
|
||||
std::string oai_resp_fc_item_id;
|
||||
int oai_resp_seq_num = 0;
|
||||
int oai_resp_output_idx = 0;
|
||||
|
||||
// for Anthropic API: track if any reasoning content has been generated
|
||||
bool anthropic_has_reasoning = false;
|
||||
|
|
|
|||
|
|
@ -71,3 +71,524 @@ def test_responses_stream_with_openai_library():
|
|||
assert r.response.output[0].id.startswith("msg_")
|
||||
assert gathered_text == r.response.output_text
|
||||
assert match_regex("(Suddenly)+", r.response.output_text)
|
||||
|
||||
|
||||
def test_responses_schema_fields():
|
||||
"""Verify the 24 Response object fields added by this PR are present
|
||||
with correct types and default values. These fields are required by
|
||||
the OpenAI Responses API spec but were missing before this change."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": "Book",
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
body = res.body
|
||||
# Usage sub-fields added by this PR
|
||||
usage = body["usage"]
|
||||
assert isinstance(usage["input_tokens_details"]["cached_tokens"], int)
|
||||
assert isinstance(usage["output_tokens_details"]["reasoning_tokens"], int)
|
||||
# All 24 fields added by this PR must be present with correct defaults
|
||||
assert body["incomplete_details"] is None
|
||||
assert body["previous_response_id"] is None
|
||||
assert body["instructions"] is None
|
||||
assert body["error"] is None
|
||||
assert body["tools"] == []
|
||||
assert body["tool_choice"] == "auto"
|
||||
assert body["truncation"] == "disabled"
|
||||
assert body["parallel_tool_calls"] == False
|
||||
assert body["text"] == {"format": {"type": "text"}}
|
||||
assert body["top_p"] == 1.0
|
||||
assert body["temperature"] == 1.0
|
||||
assert body["presence_penalty"] == 0.0
|
||||
assert body["frequency_penalty"] == 0.0
|
||||
assert body["top_logprobs"] == 0
|
||||
assert body["reasoning"] is None
|
||||
assert body["max_output_tokens"] is None
|
||||
assert body["store"] == False
|
||||
assert body["service_tier"] == "default"
|
||||
assert body["metadata"] == {}
|
||||
assert body["background"] == False
|
||||
assert body["safety_identifier"] is None
|
||||
assert body["prompt_cache_key"] is None
|
||||
assert body["max_tool_calls"] is None
|
||||
|
||||
|
||||
def test_responses_stream_schema_fields():
|
||||
"""Verify streaming done-events have the sequence_number, output_index,
|
||||
and content_index fields added by this PR. Also verify the completed
|
||||
response includes the 24 new schema fields."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": "Book",
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"stream": True,
|
||||
})
|
||||
seen_seq_nums = []
|
||||
saw_output_text_done = False
|
||||
saw_content_part_done = False
|
||||
saw_output_item_done = False
|
||||
completed_response = None
|
||||
for data in res:
|
||||
assert "sequence_number" in data, f"missing sequence_number in {data.get('type')}"
|
||||
seen_seq_nums.append(data["sequence_number"])
|
||||
if data.get("type") == "response.output_text.done":
|
||||
saw_output_text_done = True
|
||||
assert "content_index" in data
|
||||
assert "output_index" in data
|
||||
assert "logprobs" in data
|
||||
assert isinstance(data["logprobs"], list)
|
||||
if data.get("type") == "response.content_part.done":
|
||||
saw_content_part_done = True
|
||||
assert "content_index" in data
|
||||
assert "output_index" in data
|
||||
if data.get("type") == "response.output_item.done":
|
||||
saw_output_item_done = True
|
||||
assert "output_index" in data
|
||||
if data.get("type") == "response.completed":
|
||||
completed_response = data["response"]
|
||||
# Must have seen all done-event types
|
||||
assert saw_output_text_done, "never received response.output_text.done"
|
||||
assert saw_content_part_done, "never received response.content_part.done"
|
||||
assert saw_output_item_done, "never received response.output_item.done"
|
||||
# sequence_number must be present on done events and monotonically increasing
|
||||
assert len(seen_seq_nums) >= 4, f"expected >= 4 sequenced events, got {len(seen_seq_nums)}"
|
||||
assert all(a < b for a, b in zip(seen_seq_nums, seen_seq_nums[1:])), "sequence_numbers not strictly increasing"
|
||||
# completed response must have the new schema fields with correct values
|
||||
assert completed_response is not None
|
||||
assert completed_response["metadata"] == {}
|
||||
assert completed_response["store"] == False
|
||||
assert completed_response["truncation"] == "disabled"
|
||||
assert completed_response["usage"]["output_tokens_details"]["reasoning_tokens"] == 0
|
||||
|
||||
|
||||
def test_responses_non_function_tool_skipped():
|
||||
"""Non-function tool types must be silently skipped, producing a valid
|
||||
completion with no tools field in the converted chat request. Upstream
|
||||
rejects non-function types with 400; our code must return 200 and
|
||||
generate output as if no tools were provided."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"tools": [
|
||||
{"type": "web_search"},
|
||||
{"type": "code_interpreter"},
|
||||
],
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
# With all tools skipped, the model must still produce text output
|
||||
assert len(res.body["output"]) > 0
|
||||
assert len(res.body["output_text"]) > 0
|
||||
|
||||
|
||||
def test_responses_only_non_function_tools_same_as_no_tools():
|
||||
"""When ALL tools are non-function types, they should all be filtered out
|
||||
and the result should be identical to a request with no tools at all.
|
||||
Compare token counts to confirm the tools field was truly empty."""
|
||||
global server
|
||||
server.start()
|
||||
no_tools = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
with_skipped_tools = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"tools": [
|
||||
{"type": "web_search"},
|
||||
{"type": "code_interpreter"},
|
||||
{"type": "file_search"},
|
||||
],
|
||||
})
|
||||
assert no_tools.status_code == 200
|
||||
assert with_skipped_tools.status_code == 200
|
||||
# If tools were truly stripped, prompt token count must be identical
|
||||
assert with_skipped_tools.body["usage"]["input_tokens"] == no_tools.body["usage"]["input_tokens"]
|
||||
|
||||
|
||||
def test_responses_extra_keys_stripped():
|
||||
"""Responses-only request keys (store, include, prompt_cache_key, etc.)
|
||||
must be stripped before forwarding to the chat completions handler.
|
||||
The completion must succeed and produce the same output as a request
|
||||
without those keys."""
|
||||
global server
|
||||
server.start()
|
||||
# Baseline without extra keys
|
||||
baseline = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert baseline.status_code == 200
|
||||
# Same request with extra Responses-only keys
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"store": True,
|
||||
"include": ["usage"],
|
||||
"prompt_cache_key": "test_key",
|
||||
"web_search": {"enabled": True},
|
||||
"text": {"format": {"type": "text"}},
|
||||
"truncation": "auto",
|
||||
"metadata": {"key": "value"},
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
# Extra keys must not affect token consumption
|
||||
assert res.body["usage"]["input_tokens"] == baseline.body["usage"]["input_tokens"]
|
||||
|
||||
|
||||
def test_responses_developer_role_merging():
|
||||
"""Developer role messages must be merged into the first system message
|
||||
at position 0. This ensures templates that require a single system
|
||||
message don't see developer content as a separate turn.
|
||||
|
||||
We verify by comparing token counts: system + developer merged should
|
||||
consume the same prompt tokens as a single system message with the
|
||||
combined content."""
|
||||
global server
|
||||
server.start()
|
||||
# Single combined system message
|
||||
combined = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": [
|
||||
{"type": "input_text", "text": "Book"},
|
||||
{"type": "input_text", "text": "Keep it short"},
|
||||
]},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert combined.status_code == 200
|
||||
# Split system + developer (should be merged to same prompt)
|
||||
split = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": [{"type": "input_text", "text": "Book"}]},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
|
||||
{"role": "developer", "content": [{"type": "input_text", "text": "Keep it short"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert split.status_code == 200
|
||||
assert split.body["status"] == "completed"
|
||||
# Merged prompt should consume same number of input tokens
|
||||
assert split.body["usage"]["input_tokens"] == combined.body["usage"]["input_tokens"]
|
||||
|
||||
|
||||
def test_responses_input_text_type_multi_turn():
|
||||
"""input_text type must be accepted for assistant messages (EasyInputMessage).
|
||||
An assistant message without explicit type:'message' must also be accepted
|
||||
(AssistantMessageItemParam). Verify the multi-turn context is preserved
|
||||
by checking the model sees the full conversation."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "Hello"}]},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [{"type": "input_text", "text": "Hi there"}],
|
||||
},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
# Multi-turn input should result in more prompt tokens than single-turn
|
||||
single = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": "How are you",
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert single.status_code == 200
|
||||
assert res.body["usage"]["input_tokens"] > single.body["usage"]["input_tokens"]
|
||||
|
||||
|
||||
def test_responses_output_text_matches_content():
|
||||
"""output_text must be the concatenation of all output_text content parts.
|
||||
Verify this for both streaming and non-streaming responses."""
|
||||
global server
|
||||
server.start()
|
||||
# Non-streaming
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
# Manually reconstruct output_text from content parts
|
||||
reconstructed = ""
|
||||
for item in res.body["output"]:
|
||||
if item.get("type") == "message":
|
||||
for part in item["content"]:
|
||||
if part.get("type") == "output_text":
|
||||
reconstructed += part["text"]
|
||||
assert res.body["output_text"] == reconstructed
|
||||
assert len(reconstructed) > 0
|
||||
|
||||
|
||||
def test_responses_stream_output_text_consistency():
|
||||
"""Streaming gathered text must match the output_text in response.completed."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"stream": True,
|
||||
})
|
||||
gathered_text = ""
|
||||
completed_output_text = None
|
||||
for data in res:
|
||||
if data.get("type") == "response.output_text.delta":
|
||||
gathered_text += data["delta"]
|
||||
if data.get("type") == "response.completed":
|
||||
completed_output_text = data["response"]["output_text"]
|
||||
# Also verify content parts match
|
||||
for item in data["response"]["output"]:
|
||||
if item.get("type") == "message":
|
||||
for part in item["content"]:
|
||||
if part.get("type") == "output_text":
|
||||
assert part["text"] == gathered_text
|
||||
assert completed_output_text is not None
|
||||
assert gathered_text == completed_output_text
|
||||
assert len(gathered_text) > 0
|
||||
|
||||
|
||||
def test_responses_stream_created_event_has_full_response():
|
||||
"""response.created must contain the full response object with all required
|
||||
fields, not just {id, object, status}. This is needed by strict client
|
||||
libraries like async-openai."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"stream": True,
|
||||
})
|
||||
created_resp = None
|
||||
in_progress_resp = None
|
||||
for data in res:
|
||||
if data.get("type") == "response.created":
|
||||
created_resp = data["response"]
|
||||
if data.get("type") == "response.in_progress":
|
||||
in_progress_resp = data["response"]
|
||||
assert created_resp is not None, "never received response.created"
|
||||
assert in_progress_resp is not None, "never received response.in_progress"
|
||||
# Both must have the full response object, not just minimal fields
|
||||
for resp in [created_resp, in_progress_resp]:
|
||||
assert resp["status"] == "in_progress"
|
||||
assert resp["id"].startswith("resp_")
|
||||
assert resp["object"] == "response"
|
||||
assert resp["model"] is not None
|
||||
assert resp["completed_at"] is None
|
||||
assert resp["metadata"] == {}
|
||||
assert resp["store"] == False
|
||||
assert resp["truncation"] == "disabled"
|
||||
assert resp["tools"] == []
|
||||
assert resp["usage"]["input_tokens"] == 0
|
||||
assert resp["usage"]["output_tokens"] == 0
|
||||
assert resp["output"] == []
|
||||
assert resp["output_text"] == ""
|
||||
|
||||
|
||||
def test_responses_stream_all_events_have_sequence_number():
|
||||
"""Every streaming event must have a sequence_number field and they must
|
||||
be strictly increasing across the entire stream."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"stream": True,
|
||||
})
|
||||
all_seq_nums = []
|
||||
event_types = []
|
||||
for data in res:
|
||||
assert "sequence_number" in data, f"missing sequence_number in event type {data.get('type')}"
|
||||
all_seq_nums.append(data["sequence_number"])
|
||||
event_types.append(data.get("type", "unknown"))
|
||||
# Must have received multiple events
|
||||
assert len(all_seq_nums) >= 6, f"expected >= 6 events, got {len(all_seq_nums)}: {event_types}"
|
||||
# Must be strictly increasing
|
||||
for i in range(1, len(all_seq_nums)):
|
||||
assert all_seq_nums[i] > all_seq_nums[i-1], \
|
||||
f"sequence_number not strictly increasing at index {i}: {all_seq_nums[i-1]} -> {all_seq_nums[i]} (events: {event_types[i-1]} -> {event_types[i]})"
|
||||
|
||||
|
||||
def test_responses_stream_delta_events_have_indices():
|
||||
"""Delta and added events must have output_index. Content-related events
|
||||
must also have content_index."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"stream": True,
|
||||
})
|
||||
saw_output_item_added = False
|
||||
saw_content_part_added = False
|
||||
saw_output_text_delta = False
|
||||
for data in res:
|
||||
evt = data.get("type", "")
|
||||
if evt == "response.output_item.added":
|
||||
saw_output_item_added = True
|
||||
assert "output_index" in data, "output_item.added missing output_index"
|
||||
if evt == "response.content_part.added":
|
||||
saw_content_part_added = True
|
||||
assert "output_index" in data, "content_part.added missing output_index"
|
||||
assert "content_index" in data, "content_part.added missing content_index"
|
||||
if evt == "response.output_text.delta":
|
||||
saw_output_text_delta = True
|
||||
assert "output_index" in data, "output_text.delta missing output_index"
|
||||
assert "content_index" in data, "output_text.delta missing content_index"
|
||||
assert saw_output_item_added, "never received response.output_item.added"
|
||||
assert saw_content_part_added, "never received response.content_part.added"
|
||||
assert saw_output_text_delta, "never received response.output_text.delta"
|
||||
|
||||
|
||||
def test_responses_reasoning_content_array():
|
||||
"""Reasoning items with content as array (spec format) must be accepted."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
|
||||
{"type": "reasoning", "summary": [],
|
||||
"content": [{"type": "reasoning_text", "text": "thinking"}]},
|
||||
{"role": "assistant", "type": "message",
|
||||
"content": [{"type": "output_text", "text": "Hello"}]},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
|
||||
|
||||
def test_responses_reasoning_content_string():
|
||||
"""Reasoning items with content as plain string (OpenCode format) must be accepted."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
|
||||
{"type": "reasoning", "summary": [], "content": "thinking about it"},
|
||||
{"role": "assistant", "type": "message",
|
||||
"content": [{"type": "output_text", "text": "Hello"}]},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
|
||||
|
||||
def test_responses_reasoning_content_null():
|
||||
"""Reasoning items with content:null (Codex format, issue openai/codex#11834)
|
||||
must be accepted — content may be null when encrypted_content is present."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
|
||||
{"type": "reasoning", "summary": [], "content": None,
|
||||
"encrypted_content": "opaque_data_here"},
|
||||
{"role": "assistant", "type": "message",
|
||||
"content": [{"type": "output_text", "text": "Hello"}]},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
|
||||
|
||||
def test_responses_reasoning_content_omitted():
|
||||
"""Reasoning items with content omitted entirely must be accepted."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "Hi"}]},
|
||||
{"type": "reasoning", "summary": []},
|
||||
{"role": "assistant", "type": "message",
|
||||
"content": [{"type": "output_text", "text": "Hello"}]},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
|
|
|
|||
Loading…
Reference in New Issue