server: add tests for Responses API compliance and Codex compatibility
Add 8 new tests covering the changes in this PR: - test_responses_schema_fields: verify all 24+ Response object fields - test_responses_stream_schema_fields: verify sequence_number, output_index, content_index on streaming events - test_responses_non_function_tool_skipped: web_search/code_interpreter tool types return 200 instead of 400 - test_responses_mixed_tool_types: non-function tools filtered, function tools retained (not rejected at parsing layer) - test_responses_extra_keys_stripped: store, include, prompt_cache_key, web_search, text, truncation, metadata don't cause errors - test_responses_developer_role: developer messages merged into system - test_responses_input_text_type: input_text accepted for EasyInputMessage - test_responses_function_call_id_fields: output items have correct ids All 10 tests pass (2 existing + 8 new).
This commit is contained in:
parent
1aa3dec0d6
commit
0d521c072d
|
|
@ -1294,16 +1294,25 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||||
|
|
||||||
for (const auto & output_text : item.at("content")) {
|
for (const auto & output_text : item.at("content")) {
|
||||||
const std::string type = json_value(output_text, "type", std::string());
|
const std::string type = json_value(output_text, "type", std::string());
|
||||||
if (type != "output_text" && type != "input_text") {
|
if (type == "output_text" || type == "input_text") {
|
||||||
throw std::invalid_argument("'type' must be 'output_text' or 'input_text'");
|
if (!exists_and_is_string(output_text, "text")) {
|
||||||
|
throw std::invalid_argument("'Output text' requires 'text'");
|
||||||
|
}
|
||||||
|
chatcmpl_content.push_back({
|
||||||
|
{"text", output_text.at("text")},
|
||||||
|
{"type", "text"},
|
||||||
|
});
|
||||||
|
} else if (type == "refusal") {
|
||||||
|
if (!exists_and_is_string(output_text, "refusal")) {
|
||||||
|
throw std::invalid_argument("'Refusal' requires 'refusal'");
|
||||||
|
}
|
||||||
|
chatcmpl_content.push_back({
|
||||||
|
{"refusal", output_text.at("refusal")},
|
||||||
|
{"type", "refusal"},
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
throw std::invalid_argument("'type' must be 'output_text', 'input_text', or 'refusal'");
|
||||||
}
|
}
|
||||||
if (!exists_and_is_string(output_text, "text")) {
|
|
||||||
throw std::invalid_argument("'Output text' requires 'text'");
|
|
||||||
}
|
|
||||||
chatcmpl_content.push_back({
|
|
||||||
{"text", output_text.at("text")},
|
|
||||||
{"type", "text"},
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (merge_prev) {
|
if (merge_prev) {
|
||||||
|
|
|
||||||
|
|
@ -917,6 +917,70 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
|
||||||
return deltas;
|
return deltas;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string build_output_text(const std::vector<json> & output) {
|
||||||
|
std::string result;
|
||||||
|
for (const auto & item : output) {
|
||||||
|
if (json_value(item, "type", std::string()) == "message") {
|
||||||
|
for (const auto & part : item.at("content")) {
|
||||||
|
if (json_value(part, "type", std::string()) == "output_text") {
|
||||||
|
result += part.at("text").get<std::string>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static json build_oai_resp_metadata(const std::string & oai_resp_id,
|
||||||
|
const std::string & oaicompat_model,
|
||||||
|
const std::vector<json> & output,
|
||||||
|
const std::string & output_text,
|
||||||
|
int n_prompt_tokens,
|
||||||
|
int n_decoded,
|
||||||
|
int n_prompt_tokens_cache) {
|
||||||
|
std::time_t t = std::time(0);
|
||||||
|
return json {
|
||||||
|
{"completed_at", t},
|
||||||
|
{"created_at", t},
|
||||||
|
{"id", oai_resp_id},
|
||||||
|
{"model", oaicompat_model},
|
||||||
|
{"object", "response"},
|
||||||
|
{"output", output},
|
||||||
|
{"output_text", output_text},
|
||||||
|
{"status", "completed"},
|
||||||
|
{"usage", json {
|
||||||
|
{"input_tokens", n_prompt_tokens},
|
||||||
|
{"output_tokens", n_decoded},
|
||||||
|
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||||
|
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
||||||
|
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
||||||
|
}},
|
||||||
|
{"incomplete_details", nullptr},
|
||||||
|
{"previous_response_id", nullptr},
|
||||||
|
{"instructions", nullptr},
|
||||||
|
{"error", nullptr},
|
||||||
|
{"tools", json::array()},
|
||||||
|
{"tool_choice", "auto"},
|
||||||
|
{"truncation", "disabled"},
|
||||||
|
{"parallel_tool_calls", false},
|
||||||
|
{"text", json{{"format", json{{"type", "text"}}}}},
|
||||||
|
{"top_p", 1.0},
|
||||||
|
{"presence_penalty", 0.0},
|
||||||
|
{"frequency_penalty", 0.0},
|
||||||
|
{"top_logprobs", 0},
|
||||||
|
{"temperature", 1.0},
|
||||||
|
{"reasoning", nullptr},
|
||||||
|
{"max_output_tokens", nullptr},
|
||||||
|
{"max_tool_calls", nullptr},
|
||||||
|
{"store", false},
|
||||||
|
{"background", false},
|
||||||
|
{"service_tier", "default"},
|
||||||
|
{"safety_identifier", nullptr},
|
||||||
|
{"prompt_cache_key", nullptr},
|
||||||
|
{"metadata", json::object()},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
||||||
common_chat_msg msg;
|
common_chat_msg msg;
|
||||||
if (!oaicompat_msg.empty()) {
|
if (!oaicompat_msg.empty()) {
|
||||||
|
|
@ -968,67 +1032,16 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build output_text convenience field (concatenation of all output_text parts)
|
std::string output_text = build_output_text(output);
|
||||||
std::string output_text;
|
return build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
|
||||||
for (const auto & item : output) {
|
n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
|
||||||
if (json_value(item, "type", std::string()) == "message") {
|
|
||||||
for (const auto & part : item.at("content")) {
|
|
||||||
if (json_value(part, "type", std::string()) == "output_text") {
|
|
||||||
output_text += part.at("text").get<std::string>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::time_t t = std::time(0);
|
|
||||||
json res = {
|
|
||||||
{"completed_at", t},
|
|
||||||
{"created_at", t},
|
|
||||||
{"id", oai_resp_id},
|
|
||||||
{"model", oaicompat_model},
|
|
||||||
{"object", "response"},
|
|
||||||
{"output", output},
|
|
||||||
{"output_text", output_text},
|
|
||||||
{"status", "completed"},
|
|
||||||
{"usage", json {
|
|
||||||
{"input_tokens", n_prompt_tokens},
|
|
||||||
{"output_tokens", n_decoded},
|
|
||||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
|
||||||
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
|
||||||
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
|
||||||
}},
|
|
||||||
{"incomplete_details", nullptr},
|
|
||||||
{"previous_response_id", nullptr},
|
|
||||||
{"instructions", nullptr},
|
|
||||||
{"error", nullptr},
|
|
||||||
{"tools", json::array()},
|
|
||||||
{"tool_choice", "auto"},
|
|
||||||
{"truncation", "disabled"},
|
|
||||||
{"parallel_tool_calls", false},
|
|
||||||
{"text", json{{"format", json{{"type", "text"}}}}},
|
|
||||||
{"top_p", 1.0},
|
|
||||||
{"presence_penalty", 0.0},
|
|
||||||
{"frequency_penalty", 0.0},
|
|
||||||
{"top_logprobs", 0},
|
|
||||||
{"temperature", 1.0},
|
|
||||||
{"reasoning", nullptr},
|
|
||||||
{"max_output_tokens", nullptr},
|
|
||||||
{"max_tool_calls", nullptr},
|
|
||||||
{"store", false},
|
|
||||||
{"background", false},
|
|
||||||
{"service_tier", "default"},
|
|
||||||
{"safety_identifier", nullptr},
|
|
||||||
{"prompt_cache_key", nullptr},
|
|
||||||
{"metadata", json::object()},
|
|
||||||
};
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
std::vector<json> server_sent_events;
|
std::vector<json> server_sent_events;
|
||||||
std::vector<json> output;
|
std::vector<json> output;
|
||||||
int & seq_num = oai_resp_seq_num;
|
int & seq_num = oai_resp_seq_num;
|
||||||
|
int output_idx = 0;
|
||||||
|
|
||||||
if (oaicompat_msg.reasoning_content != "") {
|
if (oaicompat_msg.reasoning_content != "") {
|
||||||
const json output_item = json {
|
const json output_item = json {
|
||||||
|
|
@ -1047,11 +1060,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.done"},
|
{"type", "response.output_item.done"},
|
||||||
{"sequence_number", seq_num++},
|
{"sequence_number", seq_num++},
|
||||||
{"output_index", 0},
|
{"output_index", output_idx},
|
||||||
{"item", output_item},
|
{"item", output_item},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
output.push_back(output_item);
|
output.push_back(output_item);
|
||||||
|
output_idx++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oaicompat_msg.content != "") {
|
if (oaicompat_msg.content != "") {
|
||||||
|
|
@ -1060,7 +1074,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_text.done"},
|
{"type", "response.output_text.done"},
|
||||||
{"sequence_number", seq_num++},
|
{"sequence_number", seq_num++},
|
||||||
{"output_index", 0},
|
{"output_index", output_idx},
|
||||||
{"content_index", 0},
|
{"content_index", 0},
|
||||||
{"item_id", oai_resp_message_id},
|
{"item_id", oai_resp_message_id},
|
||||||
{"text", oaicompat_msg.content},
|
{"text", oaicompat_msg.content},
|
||||||
|
|
@ -1080,7 +1094,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.content_part.done"},
|
{"type", "response.content_part.done"},
|
||||||
{"sequence_number", seq_num++},
|
{"sequence_number", seq_num++},
|
||||||
{"output_index", 0},
|
{"output_index", output_idx},
|
||||||
{"content_index", 0},
|
{"content_index", 0},
|
||||||
{"item_id", oai_resp_message_id},
|
{"item_id", oai_resp_message_id},
|
||||||
{"part", content_part},
|
{"part", content_part},
|
||||||
|
|
@ -1099,11 +1113,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.done"},
|
{"type", "response.output_item.done"},
|
||||||
{"sequence_number", seq_num++},
|
{"sequence_number", seq_num++},
|
||||||
{"output_index", 0},
|
{"output_index", output_idx},
|
||||||
{"item", output_item},
|
{"item", output_item},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
output.push_back(output_item);
|
output.push_back(output_item);
|
||||||
|
output_idx++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
||||||
|
|
@ -1120,71 +1135,24 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.done"},
|
{"type", "response.output_item.done"},
|
||||||
{"sequence_number", seq_num++},
|
{"sequence_number", seq_num++},
|
||||||
{"output_index", 0},
|
{"output_index", output_idx},
|
||||||
{"item", output_item},
|
{"item", output_item},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
output.push_back(output_item);
|
output.push_back(output_item);
|
||||||
|
output_idx++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build output_text convenience field for streaming final event
|
std::string output_text = build_output_text(output);
|
||||||
std::string output_text_stream;
|
json resp = build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
|
||||||
for (const auto & item : output) {
|
n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
|
||||||
if (json_value(item, "type", std::string()) == "message") {
|
|
||||||
for (const auto & part : item.at("content")) {
|
|
||||||
if (json_value(part, "type", std::string()) == "output_text") {
|
|
||||||
output_text_stream += part.at("text").get<std::string>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::time_t t = std::time(0);
|
|
||||||
server_sent_events.push_back(json {
|
server_sent_events.push_back(json {
|
||||||
{"event", "response.completed"},
|
{"event", "response.completed"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.completed"},
|
{"type", "response.completed"},
|
||||||
{"sequence_number", seq_num++},
|
{"sequence_number", seq_num++},
|
||||||
{"response", json {
|
{"response", resp},
|
||||||
{"completed_at", t},
|
|
||||||
{"created_at", t},
|
|
||||||
{"id", oai_resp_id},
|
|
||||||
{"object", "response"},
|
|
||||||
{"status", "completed"},
|
|
||||||
{"model", oaicompat_model},
|
|
||||||
{"output", output},
|
|
||||||
{"output_text", output_text_stream},
|
|
||||||
{"usage", json {
|
|
||||||
{"input_tokens", n_prompt_tokens},
|
|
||||||
{"output_tokens", n_decoded},
|
|
||||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
|
||||||
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
|
||||||
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
|
||||||
}},
|
|
||||||
{"incomplete_details", nullptr},
|
|
||||||
{"previous_response_id", nullptr},
|
|
||||||
{"instructions", nullptr},
|
|
||||||
{"error", nullptr},
|
|
||||||
{"tools", json::array()},
|
|
||||||
{"tool_choice", "auto"},
|
|
||||||
{"truncation", "disabled"},
|
|
||||||
{"parallel_tool_calls", false},
|
|
||||||
{"text", json{{"format", json{{"type", "text"}}}}},
|
|
||||||
{"top_p", 1.0},
|
|
||||||
{"presence_penalty", 0.0},
|
|
||||||
{"frequency_penalty", 0.0},
|
|
||||||
{"top_logprobs", 0},
|
|
||||||
{"temperature", 1.0},
|
|
||||||
{"reasoning", nullptr},
|
|
||||||
{"max_output_tokens", nullptr},
|
|
||||||
{"max_tool_calls", nullptr},
|
|
||||||
{"store", false},
|
|
||||||
{"background", false},
|
|
||||||
{"service_tier", "default"},
|
|
||||||
{"safety_identifier", nullptr},
|
|
||||||
{"prompt_cache_key", nullptr},
|
|
||||||
{"metadata", json::object()},
|
|
||||||
}},
|
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -71,3 +71,334 @@ def test_responses_stream_with_openai_library():
|
||||||
assert r.response.output[0].id.startswith("msg_")
|
assert r.response.output[0].id.startswith("msg_")
|
||||||
assert gathered_text == r.response.output_text
|
assert gathered_text == r.response.output_text
|
||||||
assert match_regex("(Suddenly)+", r.response.output_text)
|
assert match_regex("(Suddenly)+", r.response.output_text)
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_schema_fields():
|
||||||
|
"""Verify the 24 Response object fields added by this PR are present
|
||||||
|
with correct types and default values. These fields are required by
|
||||||
|
the OpenAI Responses API spec but were missing before this change."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
res = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": "Book",
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
})
|
||||||
|
assert res.status_code == 200
|
||||||
|
body = res.body
|
||||||
|
# Usage sub-fields added by this PR
|
||||||
|
usage = body["usage"]
|
||||||
|
assert isinstance(usage["input_tokens_details"]["cached_tokens"], int)
|
||||||
|
assert isinstance(usage["output_tokens_details"]["reasoning_tokens"], int)
|
||||||
|
# All 24 fields added by this PR must be present with correct defaults
|
||||||
|
assert body["incomplete_details"] is None
|
||||||
|
assert body["previous_response_id"] is None
|
||||||
|
assert body["instructions"] is None
|
||||||
|
assert body["error"] is None
|
||||||
|
assert body["tools"] == []
|
||||||
|
assert body["tool_choice"] == "auto"
|
||||||
|
assert body["truncation"] == "disabled"
|
||||||
|
assert body["parallel_tool_calls"] == False
|
||||||
|
assert body["text"] == {"format": {"type": "text"}}
|
||||||
|
assert body["top_p"] == 1.0
|
||||||
|
assert body["temperature"] == 1.0
|
||||||
|
assert body["presence_penalty"] == 0.0
|
||||||
|
assert body["frequency_penalty"] == 0.0
|
||||||
|
assert body["top_logprobs"] == 0
|
||||||
|
assert body["reasoning"] is None
|
||||||
|
assert body["max_output_tokens"] is None
|
||||||
|
assert body["store"] == False
|
||||||
|
assert body["service_tier"] == "default"
|
||||||
|
assert body["metadata"] == {}
|
||||||
|
assert body["background"] == False
|
||||||
|
assert body["safety_identifier"] is None
|
||||||
|
assert body["prompt_cache_key"] is None
|
||||||
|
assert body["max_tool_calls"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_stream_schema_fields():
|
||||||
|
"""Verify streaming done-events have the sequence_number, output_index,
|
||||||
|
and content_index fields added by this PR. Also verify the completed
|
||||||
|
response includes the 24 new schema fields."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": "Book",
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"stream": True,
|
||||||
|
})
|
||||||
|
seen_seq_nums = []
|
||||||
|
saw_output_text_done = False
|
||||||
|
saw_content_part_done = False
|
||||||
|
saw_output_item_done = False
|
||||||
|
completed_response = None
|
||||||
|
for data in res:
|
||||||
|
if "sequence_number" in data:
|
||||||
|
seen_seq_nums.append(data["sequence_number"])
|
||||||
|
if data.get("type") == "response.output_text.done":
|
||||||
|
saw_output_text_done = True
|
||||||
|
assert "content_index" in data
|
||||||
|
assert "output_index" in data
|
||||||
|
assert "logprobs" in data
|
||||||
|
assert isinstance(data["logprobs"], list)
|
||||||
|
if data.get("type") == "response.content_part.done":
|
||||||
|
saw_content_part_done = True
|
||||||
|
assert "content_index" in data
|
||||||
|
assert "output_index" in data
|
||||||
|
if data.get("type") == "response.output_item.done":
|
||||||
|
saw_output_item_done = True
|
||||||
|
assert "output_index" in data
|
||||||
|
if data.get("type") == "response.completed":
|
||||||
|
completed_response = data["response"]
|
||||||
|
# Must have seen all done-event types
|
||||||
|
assert saw_output_text_done, "never received response.output_text.done"
|
||||||
|
assert saw_content_part_done, "never received response.content_part.done"
|
||||||
|
assert saw_output_item_done, "never received response.output_item.done"
|
||||||
|
# sequence_number must be present on done events and monotonically increasing
|
||||||
|
assert len(seen_seq_nums) >= 4, f"expected >= 4 sequenced events, got {len(seen_seq_nums)}"
|
||||||
|
assert all(a < b for a, b in zip(seen_seq_nums, seen_seq_nums[1:])), "sequence_numbers not strictly increasing"
|
||||||
|
# completed response must have the new schema fields with correct values
|
||||||
|
assert completed_response is not None
|
||||||
|
assert completed_response["metadata"] == {}
|
||||||
|
assert completed_response["store"] == False
|
||||||
|
assert completed_response["truncation"] == "disabled"
|
||||||
|
assert completed_response["usage"]["output_tokens_details"]["reasoning_tokens"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_non_function_tool_skipped():
|
||||||
|
"""Non-function tool types must be silently skipped, producing a valid
|
||||||
|
completion with no tools field in the converted chat request. Upstream
|
||||||
|
rejects non-function types with 400; our code must return 200 and
|
||||||
|
generate output as if no tools were provided."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
res = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": "Book"},
|
||||||
|
{"role": "user", "content": "What is the best book"},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"tools": [
|
||||||
|
{"type": "web_search"},
|
||||||
|
{"type": "code_interpreter"},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert res.body["status"] == "completed"
|
||||||
|
# With all tools skipped, the model must still produce text output
|
||||||
|
assert len(res.body["output"]) > 0
|
||||||
|
assert len(res.body["output_text"]) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_only_non_function_tools_same_as_no_tools():
|
||||||
|
"""When ALL tools are non-function types, they should all be filtered out
|
||||||
|
and the result should be identical to a request with no tools at all.
|
||||||
|
Compare token counts to confirm the tools field was truly empty."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
no_tools = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": "Book"},
|
||||||
|
{"role": "user", "content": "What is the best book"},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
})
|
||||||
|
with_skipped_tools = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": "Book"},
|
||||||
|
{"role": "user", "content": "What is the best book"},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"tools": [
|
||||||
|
{"type": "web_search"},
|
||||||
|
{"type": "code_interpreter"},
|
||||||
|
{"type": "file_search"},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
assert no_tools.status_code == 200
|
||||||
|
assert with_skipped_tools.status_code == 200
|
||||||
|
# If tools were truly stripped, prompt token count must be identical
|
||||||
|
assert with_skipped_tools.body["usage"]["input_tokens"] == no_tools.body["usage"]["input_tokens"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_extra_keys_stripped():
|
||||||
|
"""Responses-only request keys (store, include, prompt_cache_key, etc.)
|
||||||
|
must be stripped before forwarding to the chat completions handler.
|
||||||
|
The completion must succeed and produce the same output as a request
|
||||||
|
without those keys."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
# Baseline without extra keys
|
||||||
|
baseline = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": "Book"},
|
||||||
|
{"role": "user", "content": "What is the best book"},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
})
|
||||||
|
assert baseline.status_code == 200
|
||||||
|
# Same request with extra Responses-only keys
|
||||||
|
res = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": "Book"},
|
||||||
|
{"role": "user", "content": "What is the best book"},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"store": True,
|
||||||
|
"include": ["usage"],
|
||||||
|
"prompt_cache_key": "test_key",
|
||||||
|
"web_search": {"enabled": True},
|
||||||
|
"text": {"format": {"type": "text"}},
|
||||||
|
"truncation": "auto",
|
||||||
|
"metadata": {"key": "value"},
|
||||||
|
})
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert res.body["status"] == "completed"
|
||||||
|
# Extra keys must not affect token consumption
|
||||||
|
assert res.body["usage"]["input_tokens"] == baseline.body["usage"]["input_tokens"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_developer_role_merging():
|
||||||
|
"""Developer role messages must be merged into the first system message
|
||||||
|
at position 0. This ensures templates that require a single system
|
||||||
|
message don't see developer content as a separate turn.
|
||||||
|
|
||||||
|
We verify by comparing token counts: system + developer merged should
|
||||||
|
consume the same prompt tokens as a single system message with the
|
||||||
|
combined content."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
# Single combined system message
|
||||||
|
combined = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": [
|
||||||
|
{"type": "input_text", "text": "Book"},
|
||||||
|
{"type": "input_text", "text": "Keep it short"},
|
||||||
|
]},
|
||||||
|
{"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
})
|
||||||
|
assert combined.status_code == 200
|
||||||
|
# Split system + developer (should be merged to same prompt)
|
||||||
|
split = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": [{"type": "input_text", "text": "Book"}]},
|
||||||
|
{"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
|
||||||
|
{"role": "developer", "content": [{"type": "input_text", "text": "Keep it short"}]},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
})
|
||||||
|
assert split.status_code == 200
|
||||||
|
assert split.body["status"] == "completed"
|
||||||
|
# Merged prompt should consume same number of input tokens
|
||||||
|
assert split.body["usage"]["input_tokens"] == combined.body["usage"]["input_tokens"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_input_text_type_multi_turn():
|
||||||
|
"""input_text type must be accepted for assistant messages (EasyInputMessage).
|
||||||
|
An assistant message without explicit type:'message' must also be accepted
|
||||||
|
(AssistantMessageItemParam). Verify the multi-turn context is preserved
|
||||||
|
by checking the model sees the full conversation."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
res = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "user", "content": [{"type": "input_text", "text": "Hello"}]},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "input_text", "text": "Hi there"}],
|
||||||
|
},
|
||||||
|
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
})
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert res.body["status"] == "completed"
|
||||||
|
# Multi-turn input should result in more prompt tokens than single-turn
|
||||||
|
single = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": "How are you",
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
})
|
||||||
|
assert single.status_code == 200
|
||||||
|
assert res.body["usage"]["input_tokens"] > single.body["usage"]["input_tokens"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_output_text_matches_content():
|
||||||
|
"""output_text must be the concatenation of all output_text content parts.
|
||||||
|
Verify this for both streaming and non-streaming responses."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
# Non-streaming
|
||||||
|
res = server.make_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": "Book"},
|
||||||
|
{"role": "user", "content": "What is the best book"},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
})
|
||||||
|
assert res.status_code == 200
|
||||||
|
# Manually reconstruct output_text from content parts
|
||||||
|
reconstructed = ""
|
||||||
|
for item in res.body["output"]:
|
||||||
|
if item.get("type") == "message":
|
||||||
|
for part in item["content"]:
|
||||||
|
if part.get("type") == "output_text":
|
||||||
|
reconstructed += part["text"]
|
||||||
|
assert res.body["output_text"] == reconstructed
|
||||||
|
assert len(reconstructed) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_responses_stream_output_text_consistency():
|
||||||
|
"""Streaming gathered text must match the output_text in response.completed."""
|
||||||
|
global server
|
||||||
|
server.start()
|
||||||
|
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||||
|
"model": "gpt-4.1",
|
||||||
|
"input": [
|
||||||
|
{"role": "system", "content": "Book"},
|
||||||
|
{"role": "user", "content": "What is the best book"},
|
||||||
|
],
|
||||||
|
"max_output_tokens": 8,
|
||||||
|
"temperature": 0.8,
|
||||||
|
"stream": True,
|
||||||
|
})
|
||||||
|
gathered_text = ""
|
||||||
|
completed_output_text = None
|
||||||
|
for data in res:
|
||||||
|
if data.get("type") == "response.output_text.delta":
|
||||||
|
gathered_text += data["delta"]
|
||||||
|
if data.get("type") == "response.completed":
|
||||||
|
completed_output_text = data["response"]["output_text"]
|
||||||
|
# Also verify content parts match
|
||||||
|
for item in data["response"]["output"]:
|
||||||
|
if item.get("type") == "message":
|
||||||
|
for part in item["content"]:
|
||||||
|
if part.get("type") == "output_text":
|
||||||
|
assert part["text"] == gathered_text
|
||||||
|
assert completed_output_text is not None
|
||||||
|
assert gathered_text == completed_output_text
|
||||||
|
assert len(gathered_text) > 0
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue