server: improve Responses API compliance and Codex CLI compatibility

Codex CLI compatibility:
- Skip non-function tool types (web_search, code_interpreter)
- Merge developer/system messages into position 0 for Qwen templates
- Strip Responses-only request keys (store, include, prompt_cache_key)
- output_text convenience field in streaming and non-streaming responses

Responses API compliance (ideas from #19720 by riskywindow, adapted):
- Add 24 missing Response object fields per OpenAI spec
- Fix function_call id/call_id field mapping
- Add sequence_number, output_index, content_index to streaming events
- Accept input_text type and EasyInputMessage for multi-turn input

Verified: codex -p local and codex -p fast work against local
llama.cpp with Qwen3.5 models including native tool calling.

Refs: ggml-org/llama.cpp#19138, ggml-org/llama.cpp#19720
This commit is contained in:
Christopher Albert 2026-03-30 09:39:59 +02:00
parent 7c203670f8
commit 302c3c8f61
3 changed files with 192 additions and 71 deletions

View File

@ -1255,6 +1255,25 @@ json convert_responses_to_chatcmpl(const json & response_body) {
if (item.contains("status")) { if (item.contains("status")) {
item.erase("status"); item.erase("status");
} }
// Merge system/developer messages into the first system message.
// Many model templates (e.g. Qwen) require all system content at
// position 0 and reject system messages elsewhere in the conversation.
if (item.at("role") == "system" || item.at("role") == "developer") {
if (!chatcmpl_messages.empty() && chatcmpl_messages[0].value("role", "") == "system") {
auto & first_msg = chatcmpl_messages[0];
// Convert string content to array format if needed
if (first_msg["content"].is_string()) {
std::string old_text = first_msg["content"].get<std::string>();
first_msg["content"] = json::array({json{{"text", old_text}, {"type", "text"}}});
}
auto & first_content = first_msg["content"];
for (const auto & part : chatcmpl_content) {
first_content.push_back(part);
}
continue; // merged, don't push a separate message
}
item["role"] = "system";
}
item["content"] = chatcmpl_content; item["content"] = chatcmpl_content;
chatcmpl_messages.push_back(item); chatcmpl_messages.push_back(item);
@ -1266,35 +1285,25 @@ json convert_responses_to_chatcmpl(const json & response_body) {
// item.at("status") == "completed" || // item.at("status") == "completed" ||
// item.at("status") == "incomplete") && // item.at("status") == "incomplete") &&
// item["status"] not sent by codex-cli // item["status"] not sent by codex-cli
exists_and_is_string(item, "type") && // item["type"] == "message" for OutputMessage, absent for EasyInputMessage
item.at("type") == "message" (!item.contains("type") || item.at("type") == "message")
) { ) {
// #responses_create-input-input_item_list-item-output_message // #responses_create-input-input_item_list-item-output_message
auto chatcmpl_content = json::array(); // Also handles AssistantMessageItemParam / EasyInputMessage with role "assistant"
std::vector<json> chatcmpl_content;
for (const auto & output_text : item.at("content")) { for (const auto & output_text : item.at("content")) {
const std::string type = json_value(output_text, "type", std::string()); const std::string type = json_value(output_text, "type", std::string());
if (type == "output_text") { if (type != "output_text" && type != "input_text") {
if (!exists_and_is_string(output_text, "text")) { throw std::invalid_argument("'type' must be 'output_text' or 'input_text'");
throw std::invalid_argument("'Output text' requires 'text'");
// Ignore annotations and logprobs for now
chatcmpl_content.push_back({
{"text", output_text.at("text")},
{"type", "text"},
});
}
} else if (type == "refusal") {
if (!exists_and_is_string(output_text, "refusal")) {
throw std::invalid_argument("'Refusal' requires 'refusal'");
// Ignore annotations and logprobs for now
chatcmpl_content.push_back({
{"refusal", output_text.at("refusal")},
{"type", "refusal"},
});
}
} else {
throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'");
} }
if (!exists_and_is_string(output_text, "text")) {
throw std::invalid_argument("'Output text' requires 'text'");
}
chatcmpl_content.push_back({
{"text", output_text.at("text")},
{"type", "text"},
});
} }
if (merge_prev) { if (merge_prev) {
@ -1303,7 +1312,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
prev_msg["content"] = json::array(); prev_msg["content"] = json::array();
} }
auto & prev_content = prev_msg["content"]; auto & prev_content = prev_msg["content"];
prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end()); for (const auto & part : chatcmpl_content) {
prev_content.push_back(part);
}
} else { } else {
item.erase("status"); item.erase("status");
item.erase("type"); item.erase("type");
@ -1407,11 +1418,17 @@ json convert_responses_to_chatcmpl(const json & response_body) {
} }
std::vector<json> chatcmpl_tools; std::vector<json> chatcmpl_tools;
for (json resp_tool : response_body.at("tools")) { for (json resp_tool : response_body.at("tools")) {
json chatcmpl_tool; const std::string tool_type = json_value(resp_tool, "type", std::string());
if (json_value(resp_tool, "type", std::string()) != "function") { // Skip non-function tools (e.g. web_search, code_interpreter)
throw std::invalid_argument("'type' of tool must be 'function'"); // sent by clients like Codex CLI — these are provider-specific
// and cannot be converted to chat completions function tools
if (tool_type != "function") {
SRV_WRN("skipping unsupported tool type '%s' in Responses conversion\n", tool_type.c_str());
continue;
} }
json chatcmpl_tool;
resp_tool.erase("type"); resp_tool.erase("type");
chatcmpl_tool["type"] = "function"; chatcmpl_tool["type"] = "function";
@ -1422,7 +1439,9 @@ json convert_responses_to_chatcmpl(const json & response_body) {
chatcmpl_tools.push_back(chatcmpl_tool); chatcmpl_tools.push_back(chatcmpl_tool);
} }
chatcmpl_body.erase("tools"); chatcmpl_body.erase("tools");
chatcmpl_body["tools"] = chatcmpl_tools; if (!chatcmpl_tools.empty()) {
chatcmpl_body["tools"] = chatcmpl_tools;
}
} }
if (response_body.contains("max_output_tokens")) { if (response_body.contains("max_output_tokens")) {
@ -1430,6 +1449,15 @@ json convert_responses_to_chatcmpl(const json & response_body) {
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
} }
// Strip Responses-only keys that have no chat completions equivalent
// (e.g. Codex CLI sends store, include, prompt_cache_key, web_search)
for (const char * key : {
"store", "include", "prompt_cache_key", "web_search",
"text", "truncation", "metadata",
}) {
chatcmpl_body.erase(key);
}
return chatcmpl_body; return chatcmpl_body;
} }

View File

@ -960,28 +960,66 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
output.push_back(json { output.push_back(json {
{"type", "function_call"}, {"type", "function_call"},
{"status", "completed"}, {"id", "fc_" + random_string()},
{"arguments", tool_call.arguments}, {"call_id", tool_call.id},
{"call_id", "fc_" + tool_call.id},
{"name", tool_call.name}, {"name", tool_call.name},
{"arguments", tool_call.arguments},
{"status", "completed"},
}); });
} }
// Build output_text convenience field (concatenation of all output_text parts)
std::string output_text;
for (const auto & item : output) {
if (json_value(item, "type", std::string()) == "message") {
for (const auto & part : item.at("content")) {
if (json_value(part, "type", std::string()) == "output_text") {
output_text += part.at("text").get<std::string>();
}
}
}
}
std::time_t t = std::time(0); std::time_t t = std::time(0);
json res = { json res = {
{"completed_at", t}, {"completed_at", t},
{"created_at", t}, {"created_at", t},
{"id", oai_resp_id}, {"id", oai_resp_id},
{"model", oaicompat_model}, {"model", oaicompat_model},
{"object", "response"}, {"object", "response"},
{"output", output}, {"output", output},
{"status", "completed"}, {"output_text", output_text},
{"usage", json { {"status", "completed"},
{"input_tokens", n_prompt_tokens}, {"usage", json {
{"output_tokens", n_decoded}, {"input_tokens", n_prompt_tokens},
{"total_tokens", n_decoded + n_prompt_tokens}, {"output_tokens", n_decoded},
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }}, {"total_tokens", n_decoded + n_prompt_tokens},
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
}}, }},
{"incomplete_details", nullptr},
{"previous_response_id", nullptr},
{"instructions", nullptr},
{"error", nullptr},
{"tools", json::array()},
{"tool_choice", "auto"},
{"truncation", "disabled"},
{"parallel_tool_calls", false},
{"text", json{{"format", json{{"type", "text"}}}}},
{"top_p", 1.0},
{"presence_penalty", 0.0},
{"frequency_penalty", 0.0},
{"top_logprobs", 0},
{"temperature", 1.0},
{"reasoning", nullptr},
{"max_output_tokens", nullptr},
{"max_tool_calls", nullptr},
{"store", false},
{"background", false},
{"service_tier", "default"},
{"safety_identifier", nullptr},
{"prompt_cache_key", nullptr},
{"metadata", json::object()},
}; };
return res; return res;
@ -990,6 +1028,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
std::vector<json> server_sent_events; std::vector<json> server_sent_events;
std::vector<json> output; std::vector<json> output;
int & seq_num = oai_resp_seq_num;
if (oaicompat_msg.reasoning_content != "") { if (oaicompat_msg.reasoning_content != "") {
const json output_item = json { const json output_item = json {
@ -1006,8 +1045,10 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
server_sent_events.push_back(json { server_sent_events.push_back(json {
{"event", "response.output_item.done"}, {"event", "response.output_item.done"},
{"data", json { {"data", json {
{"type", "response.output_item.done"}, {"type", "response.output_item.done"},
{"item", output_item} {"sequence_number", seq_num++},
{"output_index", 0},
{"item", output_item},
}} }}
}); });
output.push_back(output_item); output.push_back(output_item);
@ -1017,9 +1058,13 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
server_sent_events.push_back(json { server_sent_events.push_back(json {
{"event", "response.output_text.done"}, {"event", "response.output_text.done"},
{"data", json { {"data", json {
{"type", "response.output_text.done"}, {"type", "response.output_text.done"},
{"item_id", oai_resp_message_id}, {"sequence_number", seq_num++},
{"text", oaicompat_msg.content} {"output_index", 0},
{"content_index", 0},
{"item_id", oai_resp_message_id},
{"text", oaicompat_msg.content},
{"logprobs", json::array()},
}} }}
}); });
@ -1033,9 +1078,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
server_sent_events.push_back(json { server_sent_events.push_back(json {
{"event", "response.content_part.done"}, {"event", "response.content_part.done"},
{"data", json { {"data", json {
{"type", "response.content_part.done"}, {"type", "response.content_part.done"},
{"item_id", oai_resp_message_id}, {"sequence_number", seq_num++},
{"part", content_part} {"output_index", 0},
{"content_index", 0},
{"item_id", oai_resp_message_id},
{"part", content_part},
}} }}
}); });
const json output_item = { const json output_item = {
@ -1049,8 +1097,10 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
server_sent_events.push_back(json { server_sent_events.push_back(json {
{"event", "response.output_item.done"}, {"event", "response.output_item.done"},
{"data", json { {"data", json {
{"type", "response.output_item.done"}, {"type", "response.output_item.done"},
{"item", output_item} {"sequence_number", seq_num++},
{"output_index", 0},
{"item", output_item},
}} }}
}); });
output.push_back(output_item); output.push_back(output_item);
@ -1059,39 +1109,81 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
const json output_item = { const json output_item = {
{"type", "function_call"}, {"type", "function_call"},
{"status", "completed"}, {"id", "fc_" + random_string()},
{"call_id", tool_call.id},
{"name", tool_call.name},
{"arguments", tool_call.arguments}, {"arguments", tool_call.arguments},
{"call_id", "fc_" + tool_call.id}, {"status", "completed"},
{"name", tool_call.name}
}; };
server_sent_events.push_back(json { server_sent_events.push_back(json {
{"event", "response.output_item.done"}, {"event", "response.output_item.done"},
{"data", json { {"data", json {
{"type", "response.output_item.done"}, {"type", "response.output_item.done"},
{"item", output_item} {"sequence_number", seq_num++},
{"output_index", 0},
{"item", output_item},
}} }}
}); });
output.push_back(output_item); output.push_back(output_item);
} }
// Build output_text convenience field for streaming final event
std::string output_text_stream;
for (const auto & item : output) {
if (json_value(item, "type", std::string()) == "message") {
for (const auto & part : item.at("content")) {
if (json_value(part, "type", std::string()) == "output_text") {
output_text_stream += part.at("text").get<std::string>();
}
}
}
}
std::time_t t = std::time(0); std::time_t t = std::time(0);
server_sent_events.push_back(json { server_sent_events.push_back(json {
{"event", "response.completed"}, {"event", "response.completed"},
{"data", json { {"data", json {
{"type", "response.completed"}, {"type", "response.completed"},
{"sequence_number", seq_num++},
{"response", json { {"response", json {
{"id", oai_resp_id}, {"completed_at", t},
{"object", "response"}, {"created_at", t},
{"created_at", t}, {"id", oai_resp_id},
{"status", "completed"}, {"object", "response"},
{"model", oaicompat_model}, {"status", "completed"},
{"output", output}, {"model", oaicompat_model},
{"usage", json { {"output", output},
{"input_tokens", n_prompt_tokens}, {"output_text", output_text_stream},
{"output_tokens", n_decoded}, {"usage", json {
{"total_tokens", n_decoded + n_prompt_tokens}, {"input_tokens", n_prompt_tokens},
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }}, {"output_tokens", n_decoded},
}} {"total_tokens", n_decoded + n_prompt_tokens},
{"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}},
{"output_tokens_details", json{{"reasoning_tokens", 0}}},
}},
{"incomplete_details", nullptr},
{"previous_response_id", nullptr},
{"instructions", nullptr},
{"error", nullptr},
{"tools", json::array()},
{"tool_choice", "auto"},
{"truncation", "disabled"},
{"parallel_tool_calls", false},
{"text", json{{"format", json{{"type", "text"}}}}},
{"top_p", 1.0},
{"presence_penalty", 0.0},
{"frequency_penalty", 0.0},
{"top_logprobs", 0},
{"temperature", 1.0},
{"reasoning", nullptr},
{"max_output_tokens", nullptr},
{"max_tool_calls", nullptr},
{"store", false},
{"background", false},
{"service_tier", "default"},
{"safety_identifier", nullptr},
{"prompt_cache_key", nullptr},
{"metadata", json::object()},
}}, }},
}} }}
}); });

View File

@ -370,6 +370,7 @@ struct server_task_result_cmpl_final : server_task_result {
std::string oai_resp_id; std::string oai_resp_id;
std::string oai_resp_reasoning_id; std::string oai_resp_reasoning_id;
std::string oai_resp_message_id; std::string oai_resp_message_id;
int oai_resp_seq_num = 0;
virtual bool is_stop() override { virtual bool is_stop() override {
return true; // in stream mode, final responses are considered stop return true; // in stream mode, final responses are considered stop