server: improve Responses API compliance and Codex CLI compatibility
Codex CLI compatibility: - Skip non-function tool types (web_search, code_interpreter) - Merge developer/system messages into position 0 for Qwen templates - Strip Responses-only request keys (store, include, prompt_cache_key) - output_text convenience field in streaming and non-streaming responses Responses API compliance (ideas from #19720 by riskywindow, adapted): - Add 24 missing Response object fields per OpenAI spec - Fix function_call id/call_id field mapping - Add sequence_number, output_index, content_index to streaming events - Accept input_text type and EasyInputMessage for multi-turn input Verified: codex -p local and codex -p fast work against local llama.cpp with Qwen3.5 models including native tool calling. Refs: ggml-org/llama.cpp#19138, ggml-org/llama.cpp#19720
This commit is contained in:
parent
7c203670f8
commit
302c3c8f61
|
|
@ -1255,6 +1255,25 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||||
if (item.contains("status")) {
|
if (item.contains("status")) {
|
||||||
item.erase("status");
|
item.erase("status");
|
||||||
}
|
}
|
||||||
|
// Merge system/developer messages into the first system message.
|
||||||
|
// Many model templates (e.g. Qwen) require all system content at
|
||||||
|
// position 0 and reject system messages elsewhere in the conversation.
|
||||||
|
if (item.at("role") == "system" || item.at("role") == "developer") {
|
||||||
|
if (!chatcmpl_messages.empty() && chatcmpl_messages[0].value("role", "") == "system") {
|
||||||
|
auto & first_msg = chatcmpl_messages[0];
|
||||||
|
// Convert string content to array format if needed
|
||||||
|
if (first_msg["content"].is_string()) {
|
||||||
|
std::string old_text = first_msg["content"].get<std::string>();
|
||||||
|
first_msg["content"] = json::array({json{{"text", old_text}, {"type", "text"}}});
|
||||||
|
}
|
||||||
|
auto & first_content = first_msg["content"];
|
||||||
|
for (const auto & part : chatcmpl_content) {
|
||||||
|
first_content.push_back(part);
|
||||||
|
}
|
||||||
|
continue; // merged, don't push a separate message
|
||||||
|
}
|
||||||
|
item["role"] = "system";
|
||||||
|
}
|
||||||
item["content"] = chatcmpl_content;
|
item["content"] = chatcmpl_content;
|
||||||
|
|
||||||
chatcmpl_messages.push_back(item);
|
chatcmpl_messages.push_back(item);
|
||||||
|
|
@ -1266,35 +1285,25 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||||
// item.at("status") == "completed" ||
|
// item.at("status") == "completed" ||
|
||||||
// item.at("status") == "incomplete") &&
|
// item.at("status") == "incomplete") &&
|
||||||
// item["status"] not sent by codex-cli
|
// item["status"] not sent by codex-cli
|
||||||
exists_and_is_string(item, "type") &&
|
// item["type"] == "message" for OutputMessage, absent for EasyInputMessage
|
||||||
item.at("type") == "message"
|
(!item.contains("type") || item.at("type") == "message")
|
||||||
) {
|
) {
|
||||||
// #responses_create-input-input_item_list-item-output_message
|
// #responses_create-input-input_item_list-item-output_message
|
||||||
auto chatcmpl_content = json::array();
|
// Also handles AssistantMessageItemParam / EasyInputMessage with role "assistant"
|
||||||
|
std::vector<json> chatcmpl_content;
|
||||||
|
|
||||||
for (const auto & output_text : item.at("content")) {
|
for (const auto & output_text : item.at("content")) {
|
||||||
const std::string type = json_value(output_text, "type", std::string());
|
const std::string type = json_value(output_text, "type", std::string());
|
||||||
if (type == "output_text") {
|
if (type != "output_text" && type != "input_text") {
|
||||||
if (!exists_and_is_string(output_text, "text")) {
|
throw std::invalid_argument("'type' must be 'output_text' or 'input_text'");
|
||||||
throw std::invalid_argument("'Output text' requires 'text'");
|
|
||||||
// Ignore annotations and logprobs for now
|
|
||||||
chatcmpl_content.push_back({
|
|
||||||
{"text", output_text.at("text")},
|
|
||||||
{"type", "text"},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (type == "refusal") {
|
|
||||||
if (!exists_and_is_string(output_text, "refusal")) {
|
|
||||||
throw std::invalid_argument("'Refusal' requires 'refusal'");
|
|
||||||
// Ignore annotations and logprobs for now
|
|
||||||
chatcmpl_content.push_back({
|
|
||||||
{"refusal", output_text.at("refusal")},
|
|
||||||
{"type", "refusal"},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'");
|
|
||||||
}
|
}
|
||||||
|
if (!exists_and_is_string(output_text, "text")) {
|
||||||
|
throw std::invalid_argument("'Output text' requires 'text'");
|
||||||
|
}
|
||||||
|
chatcmpl_content.push_back({
|
||||||
|
{"text", output_text.at("text")},
|
||||||
|
{"type", "text"},
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (merge_prev) {
|
if (merge_prev) {
|
||||||
|
|
@ -1303,7 +1312,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||||
prev_msg["content"] = json::array();
|
prev_msg["content"] = json::array();
|
||||||
}
|
}
|
||||||
auto & prev_content = prev_msg["content"];
|
auto & prev_content = prev_msg["content"];
|
||||||
prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end());
|
for (const auto & part : chatcmpl_content) {
|
||||||
|
prev_content.push_back(part);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
item.erase("status");
|
item.erase("status");
|
||||||
item.erase("type");
|
item.erase("type");
|
||||||
|
|
@ -1407,11 +1418,17 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||||
}
|
}
|
||||||
std::vector<json> chatcmpl_tools;
|
std::vector<json> chatcmpl_tools;
|
||||||
for (json resp_tool : response_body.at("tools")) {
|
for (json resp_tool : response_body.at("tools")) {
|
||||||
json chatcmpl_tool;
|
const std::string tool_type = json_value(resp_tool, "type", std::string());
|
||||||
|
|
||||||
if (json_value(resp_tool, "type", std::string()) != "function") {
|
// Skip non-function tools (e.g. web_search, code_interpreter)
|
||||||
throw std::invalid_argument("'type' of tool must be 'function'");
|
// sent by clients like Codex CLI — these are provider-specific
|
||||||
|
// and cannot be converted to chat completions function tools
|
||||||
|
if (tool_type != "function") {
|
||||||
|
SRV_WRN("skipping unsupported tool type '%s' in Responses conversion\n", tool_type.c_str());
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
json chatcmpl_tool;
|
||||||
resp_tool.erase("type");
|
resp_tool.erase("type");
|
||||||
chatcmpl_tool["type"] = "function";
|
chatcmpl_tool["type"] = "function";
|
||||||
|
|
||||||
|
|
@ -1422,7 +1439,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||||
chatcmpl_tools.push_back(chatcmpl_tool);
|
chatcmpl_tools.push_back(chatcmpl_tool);
|
||||||
}
|
}
|
||||||
chatcmpl_body.erase("tools");
|
chatcmpl_body.erase("tools");
|
||||||
chatcmpl_body["tools"] = chatcmpl_tools;
|
if (!chatcmpl_tools.empty()) {
|
||||||
|
chatcmpl_body["tools"] = chatcmpl_tools;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response_body.contains("max_output_tokens")) {
|
if (response_body.contains("max_output_tokens")) {
|
||||||
|
|
@ -1430,6 +1449,15 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||||
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
|
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Strip Responses-only keys that have no chat completions equivalent
|
||||||
|
// (e.g. Codex CLI sends store, include, prompt_cache_key, web_search)
|
||||||
|
for (const char * key : {
|
||||||
|
"store", "include", "prompt_cache_key", "web_search",
|
||||||
|
"text", "truncation", "metadata",
|
||||||
|
}) {
|
||||||
|
chatcmpl_body.erase(key);
|
||||||
|
}
|
||||||
|
|
||||||
return chatcmpl_body;
|
return chatcmpl_body;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -960,28 +960,66 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
||||||
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
||||||
output.push_back(json {
|
output.push_back(json {
|
||||||
{"type", "function_call"},
|
{"type", "function_call"},
|
||||||
{"status", "completed"},
|
{"id", "fc_" + random_string()},
|
||||||
{"arguments", tool_call.arguments},
|
{"call_id", tool_call.id},
|
||||||
{"call_id", "fc_" + tool_call.id},
|
|
||||||
{"name", tool_call.name},
|
{"name", tool_call.name},
|
||||||
|
{"arguments", tool_call.arguments},
|
||||||
|
{"status", "completed"},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build output_text convenience field (concatenation of all output_text parts)
|
||||||
|
std::string output_text;
|
||||||
|
for (const auto & item : output) {
|
||||||
|
if (json_value(item, "type", std::string()) == "message") {
|
||||||
|
for (const auto & part : item.at("content")) {
|
||||||
|
if (json_value(part, "type", std::string()) == "output_text") {
|
||||||
|
output_text += part.at("text").get<std::string>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::time_t t = std::time(0);
|
std::time_t t = std::time(0);
|
||||||
json res = {
|
json res = {
|
||||||
{"completed_at", t},
|
{"completed_at", t},
|
||||||
{"created_at", t},
|
{"created_at", t},
|
||||||
{"id", oai_resp_id},
|
{"id", oai_resp_id},
|
||||||
{"model", oaicompat_model},
|
{"model", oaicompat_model},
|
||||||
{"object", "response"},
|
{"object", "response"},
|
||||||
{"output", output},
|
{"output", output},
|
||||||
{"status", "completed"},
|
{"output_text", output_text},
|
||||||
{"usage", json {
|
{"status", "completed"},
|
||||||
{"input_tokens", n_prompt_tokens},
|
{"usage", json {
|
||||||
{"output_tokens", n_decoded},
|
{"input_tokens", n_prompt_tokens},
|
||||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
{"output_tokens", n_decoded},
|
||||||
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||||
|
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
||||||
|
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
||||||
}},
|
}},
|
||||||
|
{"incomplete_details", nullptr},
|
||||||
|
{"previous_response_id", nullptr},
|
||||||
|
{"instructions", nullptr},
|
||||||
|
{"error", nullptr},
|
||||||
|
{"tools", json::array()},
|
||||||
|
{"tool_choice", "auto"},
|
||||||
|
{"truncation", "disabled"},
|
||||||
|
{"parallel_tool_calls", false},
|
||||||
|
{"text", json{{"format", json{{"type", "text"}}}}},
|
||||||
|
{"top_p", 1.0},
|
||||||
|
{"presence_penalty", 0.0},
|
||||||
|
{"frequency_penalty", 0.0},
|
||||||
|
{"top_logprobs", 0},
|
||||||
|
{"temperature", 1.0},
|
||||||
|
{"reasoning", nullptr},
|
||||||
|
{"max_output_tokens", nullptr},
|
||||||
|
{"max_tool_calls", nullptr},
|
||||||
|
{"store", false},
|
||||||
|
{"background", false},
|
||||||
|
{"service_tier", "default"},
|
||||||
|
{"safety_identifier", nullptr},
|
||||||
|
{"prompt_cache_key", nullptr},
|
||||||
|
{"metadata", json::object()},
|
||||||
};
|
};
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
|
@ -990,6 +1028,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
||||||
json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
std::vector<json> server_sent_events;
|
std::vector<json> server_sent_events;
|
||||||
std::vector<json> output;
|
std::vector<json> output;
|
||||||
|
int & seq_num = oai_resp_seq_num;
|
||||||
|
|
||||||
if (oaicompat_msg.reasoning_content != "") {
|
if (oaicompat_msg.reasoning_content != "") {
|
||||||
const json output_item = json {
|
const json output_item = json {
|
||||||
|
|
@ -1006,8 +1045,10 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
server_sent_events.push_back(json {
|
server_sent_events.push_back(json {
|
||||||
{"event", "response.output_item.done"},
|
{"event", "response.output_item.done"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.done"},
|
{"type", "response.output_item.done"},
|
||||||
{"item", output_item}
|
{"sequence_number", seq_num++},
|
||||||
|
{"output_index", 0},
|
||||||
|
{"item", output_item},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
output.push_back(output_item);
|
output.push_back(output_item);
|
||||||
|
|
@ -1017,9 +1058,13 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
server_sent_events.push_back(json {
|
server_sent_events.push_back(json {
|
||||||
{"event", "response.output_text.done"},
|
{"event", "response.output_text.done"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_text.done"},
|
{"type", "response.output_text.done"},
|
||||||
{"item_id", oai_resp_message_id},
|
{"sequence_number", seq_num++},
|
||||||
{"text", oaicompat_msg.content}
|
{"output_index", 0},
|
||||||
|
{"content_index", 0},
|
||||||
|
{"item_id", oai_resp_message_id},
|
||||||
|
{"text", oaicompat_msg.content},
|
||||||
|
{"logprobs", json::array()},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -1033,9 +1078,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
server_sent_events.push_back(json {
|
server_sent_events.push_back(json {
|
||||||
{"event", "response.content_part.done"},
|
{"event", "response.content_part.done"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.content_part.done"},
|
{"type", "response.content_part.done"},
|
||||||
{"item_id", oai_resp_message_id},
|
{"sequence_number", seq_num++},
|
||||||
{"part", content_part}
|
{"output_index", 0},
|
||||||
|
{"content_index", 0},
|
||||||
|
{"item_id", oai_resp_message_id},
|
||||||
|
{"part", content_part},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
const json output_item = {
|
const json output_item = {
|
||||||
|
|
@ -1049,8 +1097,10 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
server_sent_events.push_back(json {
|
server_sent_events.push_back(json {
|
||||||
{"event", "response.output_item.done"},
|
{"event", "response.output_item.done"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.done"},
|
{"type", "response.output_item.done"},
|
||||||
{"item", output_item}
|
{"sequence_number", seq_num++},
|
||||||
|
{"output_index", 0},
|
||||||
|
{"item", output_item},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
output.push_back(output_item);
|
output.push_back(output_item);
|
||||||
|
|
@ -1059,39 +1109,81 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
||||||
const json output_item = {
|
const json output_item = {
|
||||||
{"type", "function_call"},
|
{"type", "function_call"},
|
||||||
{"status", "completed"},
|
{"id", "fc_" + random_string()},
|
||||||
|
{"call_id", tool_call.id},
|
||||||
|
{"name", tool_call.name},
|
||||||
{"arguments", tool_call.arguments},
|
{"arguments", tool_call.arguments},
|
||||||
{"call_id", "fc_" + tool_call.id},
|
{"status", "completed"},
|
||||||
{"name", tool_call.name}
|
|
||||||
};
|
};
|
||||||
server_sent_events.push_back(json {
|
server_sent_events.push_back(json {
|
||||||
{"event", "response.output_item.done"},
|
{"event", "response.output_item.done"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.output_item.done"},
|
{"type", "response.output_item.done"},
|
||||||
{"item", output_item}
|
{"sequence_number", seq_num++},
|
||||||
|
{"output_index", 0},
|
||||||
|
{"item", output_item},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
output.push_back(output_item);
|
output.push_back(output_item);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build output_text convenience field for streaming final event
|
||||||
|
std::string output_text_stream;
|
||||||
|
for (const auto & item : output) {
|
||||||
|
if (json_value(item, "type", std::string()) == "message") {
|
||||||
|
for (const auto & part : item.at("content")) {
|
||||||
|
if (json_value(part, "type", std::string()) == "output_text") {
|
||||||
|
output_text_stream += part.at("text").get<std::string>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::time_t t = std::time(0);
|
std::time_t t = std::time(0);
|
||||||
server_sent_events.push_back(json {
|
server_sent_events.push_back(json {
|
||||||
{"event", "response.completed"},
|
{"event", "response.completed"},
|
||||||
{"data", json {
|
{"data", json {
|
||||||
{"type", "response.completed"},
|
{"type", "response.completed"},
|
||||||
|
{"sequence_number", seq_num++},
|
||||||
{"response", json {
|
{"response", json {
|
||||||
{"id", oai_resp_id},
|
{"completed_at", t},
|
||||||
{"object", "response"},
|
{"created_at", t},
|
||||||
{"created_at", t},
|
{"id", oai_resp_id},
|
||||||
{"status", "completed"},
|
{"object", "response"},
|
||||||
{"model", oaicompat_model},
|
{"status", "completed"},
|
||||||
{"output", output},
|
{"model", oaicompat_model},
|
||||||
{"usage", json {
|
{"output", output},
|
||||||
{"input_tokens", n_prompt_tokens},
|
{"output_text", output_text_stream},
|
||||||
{"output_tokens", n_decoded},
|
{"usage", json {
|
||||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
{"input_tokens", n_prompt_tokens},
|
||||||
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
{"output_tokens", n_decoded},
|
||||||
}}
|
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||||
|
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
|
||||||
|
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
|
||||||
|
}},
|
||||||
|
{"incomplete_details", nullptr},
|
||||||
|
{"previous_response_id", nullptr},
|
||||||
|
{"instructions", nullptr},
|
||||||
|
{"error", nullptr},
|
||||||
|
{"tools", json::array()},
|
||||||
|
{"tool_choice", "auto"},
|
||||||
|
{"truncation", "disabled"},
|
||||||
|
{"parallel_tool_calls", false},
|
||||||
|
{"text", json{{"format", json{{"type", "text"}}}}},
|
||||||
|
{"top_p", 1.0},
|
||||||
|
{"presence_penalty", 0.0},
|
||||||
|
{"frequency_penalty", 0.0},
|
||||||
|
{"top_logprobs", 0},
|
||||||
|
{"temperature", 1.0},
|
||||||
|
{"reasoning", nullptr},
|
||||||
|
{"max_output_tokens", nullptr},
|
||||||
|
{"max_tool_calls", nullptr},
|
||||||
|
{"store", false},
|
||||||
|
{"background", false},
|
||||||
|
{"service_tier", "default"},
|
||||||
|
{"safety_identifier", nullptr},
|
||||||
|
{"prompt_cache_key", nullptr},
|
||||||
|
{"metadata", json::object()},
|
||||||
}},
|
}},
|
||||||
}}
|
}}
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -370,6 +370,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||||
std::string oai_resp_id;
|
std::string oai_resp_id;
|
||||||
std::string oai_resp_reasoning_id;
|
std::string oai_resp_reasoning_id;
|
||||||
std::string oai_resp_message_id;
|
std::string oai_resp_message_id;
|
||||||
|
int oai_resp_seq_num = 0;
|
||||||
|
|
||||||
virtual bool is_stop() override {
|
virtual bool is_stop() override {
|
||||||
return true; // in stream mode, final responses are considered stop
|
return true; // in stream mode, final responses are considered stop
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue