server: add tests for Responses API compliance and Codex compatibility
Add 8 new tests covering the changes in this PR: - test_responses_schema_fields: verify all 24+ Response object fields - test_responses_stream_schema_fields: verify sequence_number, output_index, content_index on streaming events - test_responses_non_function_tool_skipped: web_search/code_interpreter tool types return 200 instead of 400 - test_responses_mixed_tool_types: non-function tools filtered, function tools retained (not rejected at parsing layer) - test_responses_extra_keys_stripped: store, include, prompt_cache_key, web_search, text, truncation, metadata don't cause errors - test_responses_developer_role: developer messages merged into system - test_responses_input_text_type: input_text accepted for EasyInputMessage - test_responses_function_call_id_fields: output items have correct ids All 10 tests pass (2 existing + 8 new).
This commit is contained in:
parent
302c3c8f61
commit
467266ba4c
|
|
@ -1294,16 +1294,25 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
|||
|
||||
for (const auto & output_text : item.at("content")) {
|
||||
const std::string type = json_value(output_text, "type", std::string());
|
||||
if (type != "output_text" && type != "input_text") {
|
||||
throw std::invalid_argument("'type' must be 'output_text' or 'input_text'");
|
||||
if (type == "output_text" || type == "input_text") {
|
||||
if (!exists_and_is_string(output_text, "text")) {
|
||||
throw std::invalid_argument("'Output text' requires 'text'");
|
||||
}
|
||||
chatcmpl_content.push_back({
|
||||
{"text", output_text.at("text")},
|
||||
{"type", "text"},
|
||||
});
|
||||
} else if (type == "refusal") {
|
||||
if (!exists_and_is_string(output_text, "refusal")) {
|
||||
throw std::invalid_argument("'Refusal' requires 'refusal'");
|
||||
}
|
||||
chatcmpl_content.push_back({
|
||||
{"refusal", output_text.at("refusal")},
|
||||
{"type", "refusal"},
|
||||
});
|
||||
} else {
|
||||
throw std::invalid_argument("'type' must be 'output_text', 'input_text', or 'refusal'");
|
||||
}
|
||||
if (!exists_and_is_string(output_text, "text")) {
|
||||
throw std::invalid_argument("'Output text' requires 'text'");
|
||||
}
|
||||
chatcmpl_content.push_back({
|
||||
{"text", output_text.at("text")},
|
||||
{"type", "text"},
|
||||
});
|
||||
}
|
||||
|
||||
if (merge_prev) {
|
||||
|
|
|
|||
|
|
@ -917,6 +917,70 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
|
|||
return deltas;
|
||||
}
|
||||
|
||||
static std::string build_output_text(const std::vector<json> & output) {
|
||||
std::string result;
|
||||
for (const auto & item : output) {
|
||||
if (json_value(item, "type", std::string()) == "message") {
|
||||
for (const auto & part : item.at("content")) {
|
||||
if (json_value(part, "type", std::string()) == "output_text") {
|
||||
result += part.at("text").get<std::string>();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static json build_oai_resp_metadata(const std::string & oai_resp_id,
|
||||
const std::string & oaicompat_model,
|
||||
const std::vector<json> & output,
|
||||
const std::string & output_text,
|
||||
int n_prompt_tokens,
|
||||
int n_decoded,
|
||||
int n_prompt_tokens_cache) {
|
||||
std::time_t t = std::time(0);
|
||||
return json {
|
||||
{"completed_at", t},
|
||||
{"created_at", t},
|
||||
{"id", oai_resp_id},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "response"},
|
||||
{"output", output},
|
||||
{"output_text", output_text},
|
||||
{"status", "completed"},
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
||||
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
||||
}},
|
||||
{"incomplete_details", nullptr},
|
||||
{"previous_response_id", nullptr},
|
||||
{"instructions", nullptr},
|
||||
{"error", nullptr},
|
||||
{"tools", json::array()},
|
||||
{"tool_choice", "auto"},
|
||||
{"truncation", "disabled"},
|
||||
{"parallel_tool_calls", false},
|
||||
{"text", json{{"format", json{{"type", "text"}}}}},
|
||||
{"top_p", 1.0},
|
||||
{"presence_penalty", 0.0},
|
||||
{"frequency_penalty", 0.0},
|
||||
{"top_logprobs", 0},
|
||||
{"temperature", 1.0},
|
||||
{"reasoning", nullptr},
|
||||
{"max_output_tokens", nullptr},
|
||||
{"max_tool_calls", nullptr},
|
||||
{"store", false},
|
||||
{"background", false},
|
||||
{"service_tier", "default"},
|
||||
{"safety_identifier", nullptr},
|
||||
{"prompt_cache_key", nullptr},
|
||||
{"metadata", json::object()},
|
||||
};
|
||||
}
|
||||
|
||||
json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
||||
common_chat_msg msg;
|
||||
if (!oaicompat_msg.empty()) {
|
||||
|
|
@ -968,67 +1032,16 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
|||
});
|
||||
}
|
||||
|
||||
// Build output_text convenience field (concatenation of all output_text parts)
|
||||
std::string output_text;
|
||||
for (const auto & item : output) {
|
||||
if (json_value(item, "type", std::string()) == "message") {
|
||||
for (const auto & part : item.at("content")) {
|
||||
if (json_value(part, "type", std::string()) == "output_text") {
|
||||
output_text += part.at("text").get<std::string>();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
json res = {
|
||||
{"completed_at", t},
|
||||
{"created_at", t},
|
||||
{"id", oai_resp_id},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "response"},
|
||||
{"output", output},
|
||||
{"output_text", output_text},
|
||||
{"status", "completed"},
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
||||
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
||||
}},
|
||||
{"incomplete_details", nullptr},
|
||||
{"previous_response_id", nullptr},
|
||||
{"instructions", nullptr},
|
||||
{"error", nullptr},
|
||||
{"tools", json::array()},
|
||||
{"tool_choice", "auto"},
|
||||
{"truncation", "disabled"},
|
||||
{"parallel_tool_calls", false},
|
||||
{"text", json{{"format", json{{"type", "text"}}}}},
|
||||
{"top_p", 1.0},
|
||||
{"presence_penalty", 0.0},
|
||||
{"frequency_penalty", 0.0},
|
||||
{"top_logprobs", 0},
|
||||
{"temperature", 1.0},
|
||||
{"reasoning", nullptr},
|
||||
{"max_output_tokens", nullptr},
|
||||
{"max_tool_calls", nullptr},
|
||||
{"store", false},
|
||||
{"background", false},
|
||||
{"service_tier", "default"},
|
||||
{"safety_identifier", nullptr},
|
||||
{"prompt_cache_key", nullptr},
|
||||
{"metadata", json::object()},
|
||||
};
|
||||
|
||||
return res;
|
||||
std::string output_text = build_output_text(output);
|
||||
return build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
|
||||
n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
|
||||
}
|
||||
|
||||
json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||
std::vector<json> server_sent_events;
|
||||
std::vector<json> output;
|
||||
int & seq_num = oai_resp_seq_num;
|
||||
int output_idx = 0;
|
||||
|
||||
if (oaicompat_msg.reasoning_content != "") {
|
||||
const json output_item = json {
|
||||
|
|
@ -1047,11 +1060,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
{"data", json {
|
||||
{"type", "response.output_item.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", 0},
|
||||
{"output_index", output_idx},
|
||||
{"item", output_item},
|
||||
}}
|
||||
});
|
||||
output.push_back(output_item);
|
||||
output_idx++;
|
||||
}
|
||||
|
||||
if (oaicompat_msg.content != "") {
|
||||
|
|
@ -1060,7 +1074,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
{"data", json {
|
||||
{"type", "response.output_text.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", 0},
|
||||
{"output_index", output_idx},
|
||||
{"content_index", 0},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"text", oaicompat_msg.content},
|
||||
|
|
@ -1080,7 +1094,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
{"data", json {
|
||||
{"type", "response.content_part.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", 0},
|
||||
{"output_index", output_idx},
|
||||
{"content_index", 0},
|
||||
{"item_id", oai_resp_message_id},
|
||||
{"part", content_part},
|
||||
|
|
@ -1099,11 +1113,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
{"data", json {
|
||||
{"type", "response.output_item.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", 0},
|
||||
{"output_index", output_idx},
|
||||
{"item", output_item},
|
||||
}}
|
||||
});
|
||||
output.push_back(output_item);
|
||||
output_idx++;
|
||||
}
|
||||
|
||||
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
||||
|
|
@ -1120,71 +1135,24 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
{"data", json {
|
||||
{"type", "response.output_item.done"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"output_index", 0},
|
||||
{"output_index", output_idx},
|
||||
{"item", output_item},
|
||||
}}
|
||||
});
|
||||
output.push_back(output_item);
|
||||
output_idx++;
|
||||
}
|
||||
|
||||
// Build output_text convenience field for streaming final event
|
||||
std::string output_text_stream;
|
||||
for (const auto & item : output) {
|
||||
if (json_value(item, "type", std::string()) == "message") {
|
||||
for (const auto & part : item.at("content")) {
|
||||
if (json_value(part, "type", std::string()) == "output_text") {
|
||||
output_text_stream += part.at("text").get<std::string>();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string output_text = build_output_text(output);
|
||||
json resp = build_oai_resp_metadata(oai_resp_id, oaicompat_model, output, output_text,
|
||||
n_prompt_tokens, n_decoded, n_prompt_tokens_cache);
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
server_sent_events.push_back(json {
|
||||
{"event", "response.completed"},
|
||||
{"data", json {
|
||||
{"type", "response.completed"},
|
||||
{"sequence_number", seq_num++},
|
||||
{"response", json {
|
||||
{"completed_at", t},
|
||||
{"created_at", t},
|
||||
{"id", oai_resp_id},
|
||||
{"object", "response"},
|
||||
{"status", "completed"},
|
||||
{"model", oaicompat_model},
|
||||
{"output", output},
|
||||
{"output_text", output_text_stream},
|
||||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
||||
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
||||
}},
|
||||
{"incomplete_details", nullptr},
|
||||
{"previous_response_id", nullptr},
|
||||
{"instructions", nullptr},
|
||||
{"error", nullptr},
|
||||
{"tools", json::array()},
|
||||
{"tool_choice", "auto"},
|
||||
{"truncation", "disabled"},
|
||||
{"parallel_tool_calls", false},
|
||||
{"text", json{{"format", json{{"type", "text"}}}}},
|
||||
{"top_p", 1.0},
|
||||
{"presence_penalty", 0.0},
|
||||
{"frequency_penalty", 0.0},
|
||||
{"top_logprobs", 0},
|
||||
{"temperature", 1.0},
|
||||
{"reasoning", nullptr},
|
||||
{"max_output_tokens", nullptr},
|
||||
{"max_tool_calls", nullptr},
|
||||
{"store", false},
|
||||
{"background", false},
|
||||
{"service_tier", "default"},
|
||||
{"safety_identifier", nullptr},
|
||||
{"prompt_cache_key", nullptr},
|
||||
{"metadata", json::object()},
|
||||
}},
|
||||
{"response", resp},
|
||||
}}
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -71,3 +71,334 @@ def test_responses_stream_with_openai_library():
|
|||
assert r.response.output[0].id.startswith("msg_")
|
||||
assert gathered_text == r.response.output_text
|
||||
assert match_regex("(Suddenly)+", r.response.output_text)
|
||||
|
||||
|
||||
def test_responses_schema_fields():
|
||||
"""Verify the 24 Response object fields added by this PR are present
|
||||
with correct types and default values. These fields are required by
|
||||
the OpenAI Responses API spec but were missing before this change."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": "Book",
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
body = res.body
|
||||
# Usage sub-fields added by this PR
|
||||
usage = body["usage"]
|
||||
assert isinstance(usage["input_tokens_details"]["cached_tokens"], int)
|
||||
assert isinstance(usage["output_tokens_details"]["reasoning_tokens"], int)
|
||||
# All 24 fields added by this PR must be present with correct defaults
|
||||
assert body["incomplete_details"] is None
|
||||
assert body["previous_response_id"] is None
|
||||
assert body["instructions"] is None
|
||||
assert body["error"] is None
|
||||
assert body["tools"] == []
|
||||
assert body["tool_choice"] == "auto"
|
||||
assert body["truncation"] == "disabled"
|
||||
assert body["parallel_tool_calls"] == False
|
||||
assert body["text"] == {"format": {"type": "text"}}
|
||||
assert body["top_p"] == 1.0
|
||||
assert body["temperature"] == 1.0
|
||||
assert body["presence_penalty"] == 0.0
|
||||
assert body["frequency_penalty"] == 0.0
|
||||
assert body["top_logprobs"] == 0
|
||||
assert body["reasoning"] is None
|
||||
assert body["max_output_tokens"] is None
|
||||
assert body["store"] == False
|
||||
assert body["service_tier"] == "default"
|
||||
assert body["metadata"] == {}
|
||||
assert body["background"] == False
|
||||
assert body["safety_identifier"] is None
|
||||
assert body["prompt_cache_key"] is None
|
||||
assert body["max_tool_calls"] is None
|
||||
|
||||
|
||||
def test_responses_stream_schema_fields():
|
||||
"""Verify streaming done-events have the sequence_number, output_index,
|
||||
and content_index fields added by this PR. Also verify the completed
|
||||
response includes the 24 new schema fields."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": "Book",
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"stream": True,
|
||||
})
|
||||
seen_seq_nums = []
|
||||
saw_output_text_done = False
|
||||
saw_content_part_done = False
|
||||
saw_output_item_done = False
|
||||
completed_response = None
|
||||
for data in res:
|
||||
if "sequence_number" in data:
|
||||
seen_seq_nums.append(data["sequence_number"])
|
||||
if data.get("type") == "response.output_text.done":
|
||||
saw_output_text_done = True
|
||||
assert "content_index" in data
|
||||
assert "output_index" in data
|
||||
assert "logprobs" in data
|
||||
assert isinstance(data["logprobs"], list)
|
||||
if data.get("type") == "response.content_part.done":
|
||||
saw_content_part_done = True
|
||||
assert "content_index" in data
|
||||
assert "output_index" in data
|
||||
if data.get("type") == "response.output_item.done":
|
||||
saw_output_item_done = True
|
||||
assert "output_index" in data
|
||||
if data.get("type") == "response.completed":
|
||||
completed_response = data["response"]
|
||||
# Must have seen all done-event types
|
||||
assert saw_output_text_done, "never received response.output_text.done"
|
||||
assert saw_content_part_done, "never received response.content_part.done"
|
||||
assert saw_output_item_done, "never received response.output_item.done"
|
||||
# sequence_number must be present on done events and monotonically increasing
|
||||
assert len(seen_seq_nums) >= 4, f"expected >= 4 sequenced events, got {len(seen_seq_nums)}"
|
||||
assert all(a < b for a, b in zip(seen_seq_nums, seen_seq_nums[1:])), "sequence_numbers not strictly increasing"
|
||||
# completed response must have the new schema fields with correct values
|
||||
assert completed_response is not None
|
||||
assert completed_response["metadata"] == {}
|
||||
assert completed_response["store"] == False
|
||||
assert completed_response["truncation"] == "disabled"
|
||||
assert completed_response["usage"]["output_tokens_details"]["reasoning_tokens"] == 0
|
||||
|
||||
|
||||
def test_responses_non_function_tool_skipped():
|
||||
"""Non-function tool types must be silently skipped, producing a valid
|
||||
completion with no tools field in the converted chat request. Upstream
|
||||
rejects non-function types with 400; our code must return 200 and
|
||||
generate output as if no tools were provided."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"tools": [
|
||||
{"type": "web_search"},
|
||||
{"type": "code_interpreter"},
|
||||
],
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
# With all tools skipped, the model must still produce text output
|
||||
assert len(res.body["output"]) > 0
|
||||
assert len(res.body["output_text"]) > 0
|
||||
|
||||
|
||||
def test_responses_only_non_function_tools_same_as_no_tools():
|
||||
"""When ALL tools are non-function types, they should all be filtered out
|
||||
and the result should be identical to a request with no tools at all.
|
||||
Compare token counts to confirm the tools field was truly empty."""
|
||||
global server
|
||||
server.start()
|
||||
no_tools = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
with_skipped_tools = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"tools": [
|
||||
{"type": "web_search"},
|
||||
{"type": "code_interpreter"},
|
||||
{"type": "file_search"},
|
||||
],
|
||||
})
|
||||
assert no_tools.status_code == 200
|
||||
assert with_skipped_tools.status_code == 200
|
||||
# If tools were truly stripped, prompt token count must be identical
|
||||
assert with_skipped_tools.body["usage"]["input_tokens"] == no_tools.body["usage"]["input_tokens"]
|
||||
|
||||
|
||||
def test_responses_extra_keys_stripped():
|
||||
"""Responses-only request keys (store, include, prompt_cache_key, etc.)
|
||||
must be stripped before forwarding to the chat completions handler.
|
||||
The completion must succeed and produce the same output as a request
|
||||
without those keys."""
|
||||
global server
|
||||
server.start()
|
||||
# Baseline without extra keys
|
||||
baseline = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert baseline.status_code == 200
|
||||
# Same request with extra Responses-only keys
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"store": True,
|
||||
"include": ["usage"],
|
||||
"prompt_cache_key": "test_key",
|
||||
"web_search": {"enabled": True},
|
||||
"text": {"format": {"type": "text"}},
|
||||
"truncation": "auto",
|
||||
"metadata": {"key": "value"},
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
# Extra keys must not affect token consumption
|
||||
assert res.body["usage"]["input_tokens"] == baseline.body["usage"]["input_tokens"]
|
||||
|
||||
|
||||
def test_responses_developer_role_merging():
|
||||
"""Developer role messages must be merged into the first system message
|
||||
at position 0. This ensures templates that require a single system
|
||||
message don't see developer content as a separate turn.
|
||||
|
||||
We verify by comparing token counts: system + developer merged should
|
||||
consume the same prompt tokens as a single system message with the
|
||||
combined content."""
|
||||
global server
|
||||
server.start()
|
||||
# Single combined system message
|
||||
combined = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": [
|
||||
{"type": "input_text", "text": "Book"},
|
||||
{"type": "input_text", "text": "Keep it short"},
|
||||
]},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert combined.status_code == 200
|
||||
# Split system + developer (should be merged to same prompt)
|
||||
split = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": [{"type": "input_text", "text": "Book"}]},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "What is the best book"}]},
|
||||
{"role": "developer", "content": [{"type": "input_text", "text": "Keep it short"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert split.status_code == 200
|
||||
assert split.body["status"] == "completed"
|
||||
# Merged prompt should consume same number of input tokens
|
||||
assert split.body["usage"]["input_tokens"] == combined.body["usage"]["input_tokens"]
|
||||
|
||||
|
||||
def test_responses_input_text_type_multi_turn():
|
||||
"""input_text type must be accepted for assistant messages (EasyInputMessage).
|
||||
An assistant message without explicit type:'message' must also be accepted
|
||||
(AssistantMessageItemParam). Verify the multi-turn context is preserved
|
||||
by checking the model sees the full conversation."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "Hello"}]},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [{"type": "input_text", "text": "Hi there"}],
|
||||
},
|
||||
{"role": "user", "content": [{"type": "input_text", "text": "How are you"}]},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["status"] == "completed"
|
||||
# Multi-turn input should result in more prompt tokens than single-turn
|
||||
single = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": "How are you",
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert single.status_code == 200
|
||||
assert res.body["usage"]["input_tokens"] > single.body["usage"]["input_tokens"]
|
||||
|
||||
|
||||
def test_responses_output_text_matches_content():
|
||||
"""output_text must be the concatenation of all output_text content parts.
|
||||
Verify this for both streaming and non-streaming responses."""
|
||||
global server
|
||||
server.start()
|
||||
# Non-streaming
|
||||
res = server.make_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
# Manually reconstruct output_text from content parts
|
||||
reconstructed = ""
|
||||
for item in res.body["output"]:
|
||||
if item.get("type") == "message":
|
||||
for part in item["content"]:
|
||||
if part.get("type") == "output_text":
|
||||
reconstructed += part["text"]
|
||||
assert res.body["output_text"] == reconstructed
|
||||
assert len(reconstructed) > 0
|
||||
|
||||
|
||||
def test_responses_stream_output_text_consistency():
|
||||
"""Streaming gathered text must match the output_text in response.completed."""
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/v1/responses", data={
|
||||
"model": "gpt-4.1",
|
||||
"input": [
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
"max_output_tokens": 8,
|
||||
"temperature": 0.8,
|
||||
"stream": True,
|
||||
})
|
||||
gathered_text = ""
|
||||
completed_output_text = None
|
||||
for data in res:
|
||||
if data.get("type") == "response.output_text.delta":
|
||||
gathered_text += data["delta"]
|
||||
if data.get("type") == "response.completed":
|
||||
completed_output_text = data["response"]["output_text"]
|
||||
# Also verify content parts match
|
||||
for item in data["response"]["output"]:
|
||||
if item.get("type") == "message":
|
||||
for part in item["content"]:
|
||||
if part.get("type") == "output_text":
|
||||
assert part["text"] == gathered_text
|
||||
assert completed_output_text is not None
|
||||
assert gathered_text == completed_output_text
|
||||
assert len(gathered_text) > 0
|
||||
|
|
|
|||
Loading…
Reference in New Issue