diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index ed5e306fc5..974823017b 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1255,6 +1255,25 @@ json convert_responses_to_chatcmpl(const json & response_body) { if (item.contains("status")) { item.erase("status"); } + // Merge system/developer messages into the first system message. + // Many model templates (e.g. Qwen) require all system content at + // position 0 and reject system messages elsewhere in the conversation. + if (item.at("role") == "system" || item.at("role") == "developer") { + if (!chatcmpl_messages.empty() && chatcmpl_messages[0].value("role", "") == "system") { + auto & first_msg = chatcmpl_messages[0]; + // Convert string content to array format if needed + if (first_msg["content"].is_string()) { + std::string old_text = first_msg["content"].get(); + first_msg["content"] = json::array({json{{"text", old_text}, {"type", "text"}}}); + } + auto & first_content = first_msg["content"]; + for (const auto & part : chatcmpl_content) { + first_content.push_back(part); + } + continue; // merged, don't push a separate message + } + item["role"] = "system"; + } item["content"] = chatcmpl_content; chatcmpl_messages.push_back(item); @@ -1266,35 +1285,25 @@ json convert_responses_to_chatcmpl(const json & response_body) { // item.at("status") == "completed" || // item.at("status") == "incomplete") && // item["status"] not sent by codex-cli - exists_and_is_string(item, "type") && - item.at("type") == "message" + // item["type"] == "message" for OutputMessage, absent for EasyInputMessage + (!item.contains("type") || item.at("type") == "message") ) { // #responses_create-input-input_item_list-item-output_message - auto chatcmpl_content = json::array(); + // Also handles AssistantMessageItemParam / EasyInputMessage with role "assistant" + std::vector chatcmpl_content; for (const auto & output_text : item.at("content")) { const std::string type = json_value(output_text, "type", std::string()); - if (type == "output_text") { - if (!exists_and_is_string(output_text, "text")) { - throw std::invalid_argument("'Output text' requires 'text'"); - // Ignore annotations and logprobs for now - chatcmpl_content.push_back({ - {"text", output_text.at("text")}, - {"type", "text"}, - }); - } - } else if (type == "refusal") { - if (!exists_and_is_string(output_text, "refusal")) { - throw std::invalid_argument("'Refusal' requires 'refusal'"); - // Ignore annotations and logprobs for now - chatcmpl_content.push_back({ - {"refusal", output_text.at("refusal")}, - {"type", "refusal"}, - }); - } - } else { - throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'"); + if (type != "output_text" && type != "input_text") { + throw std::invalid_argument("'type' must be 'output_text' or 'input_text'"); } + if (!exists_and_is_string(output_text, "text")) { + throw std::invalid_argument("'Output text' requires 'text'"); + } + chatcmpl_content.push_back({ + {"text", output_text.at("text")}, + {"type", "text"}, + }); } if (merge_prev) { @@ -1303,7 +1312,9 @@ json convert_responses_to_chatcmpl(const json & response_body) { prev_msg["content"] = json::array(); } auto & prev_content = prev_msg["content"]; - prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end()); + for (const auto & part : chatcmpl_content) { + prev_content.push_back(part); + } } else { item.erase("status"); item.erase("type"); @@ -1407,11 +1418,17 @@ json convert_responses_to_chatcmpl(const json & response_body) { } std::vector chatcmpl_tools; for (json resp_tool : response_body.at("tools")) { - json chatcmpl_tool; + const std::string tool_type = json_value(resp_tool, "type", std::string()); - if (json_value(resp_tool, "type", std::string()) != "function") { - throw std::invalid_argument("'type' of tool must be 'function'"); + // Skip non-function tools (e.g. web_search, code_interpreter) + // sent by clients like Codex CLI — these are provider-specific + // and cannot be converted to chat completions function tools + if (tool_type != "function") { + SRV_WRN("skipping unsupported tool type '%s' in Responses conversion\n", tool_type.c_str()); + continue; } + + json chatcmpl_tool; resp_tool.erase("type"); chatcmpl_tool["type"] = "function"; @@ -1422,7 +1439,9 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_tools.push_back(chatcmpl_tool); } chatcmpl_body.erase("tools"); - chatcmpl_body["tools"] = chatcmpl_tools; + if (!chatcmpl_tools.empty()) { + chatcmpl_body["tools"] = chatcmpl_tools; + } } if (response_body.contains("max_output_tokens")) { @@ -1430,6 +1449,15 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; } + // Strip Responses-only keys that have no chat completions equivalent + // (e.g. Codex CLI sends store, include, prompt_cache_key, web_search) + for (const char * key : { + "store", "include", "prompt_cache_key", "web_search", + "text", "truncation", "metadata", + }) { + chatcmpl_body.erase(key); + } + return chatcmpl_body; } diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 3018ac90f8..96a7e3cb33 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -960,28 +960,66 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { output.push_back(json { {"type", "function_call"}, - {"status", "completed"}, - {"arguments", tool_call.arguments}, - {"call_id", "fc_" + tool_call.id}, + {"id", "fc_" + random_string()}, + {"call_id", tool_call.id}, {"name", tool_call.name}, + {"arguments", tool_call.arguments}, + {"status", "completed"}, }); } + // Build output_text convenience field (concatenation of all output_text parts) + std::string output_text; + for (const auto & item : output) { + if (json_value(item, "type", std::string()) == "message") { + for (const auto & part : item.at("content")) { + if (json_value(part, "type", std::string()) == "output_text") { + output_text += part.at("text").get(); + } + } + } + } + std::time_t t = std::time(0); json res = { - {"completed_at", t}, - {"created_at", t}, - {"id", oai_resp_id}, - {"model", oaicompat_model}, - {"object", "response"}, - {"output", output}, - {"status", "completed"}, - {"usage", json { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", n_decoded}, - {"total_tokens", n_decoded + n_prompt_tokens}, - {"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }}, + {"completed_at", t}, + {"created_at", t}, + {"id", oai_resp_id}, + {"model", oaicompat_model}, + {"object", "response"}, + {"output", output}, + {"output_text", output_text}, + {"status", "completed"}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens}, + {"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}}, + {"output_tokens_details", json{{"reasoning_tokens", 0}}}, }}, + {"incomplete_details", nullptr}, + {"previous_response_id", nullptr}, + {"instructions", nullptr}, + {"error", nullptr}, + {"tools", json::array()}, + {"tool_choice", "auto"}, + {"truncation", "disabled"}, + {"parallel_tool_calls", false}, + {"text", json{{"format", json{{"type", "text"}}}}}, + {"top_p", 1.0}, + {"presence_penalty", 0.0}, + {"frequency_penalty", 0.0}, + {"top_logprobs", 0}, + {"temperature", 1.0}, + {"reasoning", nullptr}, + {"max_output_tokens", nullptr}, + {"max_tool_calls", nullptr}, + {"store", false}, + {"background", false}, + {"service_tier", "default"}, + {"safety_identifier", nullptr}, + {"prompt_cache_key", nullptr}, + {"metadata", json::object()}, }; return res; @@ -990,6 +1028,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { std::vector server_sent_events; std::vector output; + int & seq_num = oai_resp_seq_num; if (oaicompat_msg.reasoning_content != "") { const json output_item = json { @@ -1006,8 +1045,10 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} + {"type", "response.output_item.done"}, + {"sequence_number", seq_num++}, + {"output_index", 0}, + {"item", output_item}, }} }); output.push_back(output_item); @@ -1017,9 +1058,13 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { server_sent_events.push_back(json { {"event", "response.output_text.done"}, {"data", json { - {"type", "response.output_text.done"}, - {"item_id", oai_resp_message_id}, - {"text", oaicompat_msg.content} + {"type", "response.output_text.done"}, + {"sequence_number", seq_num++}, + {"output_index", 0}, + {"content_index", 0}, + {"item_id", oai_resp_message_id}, + {"text", oaicompat_msg.content}, + {"logprobs", json::array()}, }} }); @@ -1033,9 +1078,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { server_sent_events.push_back(json { {"event", "response.content_part.done"}, {"data", json { - {"type", "response.content_part.done"}, - {"item_id", oai_resp_message_id}, - {"part", content_part} + {"type", "response.content_part.done"}, + {"sequence_number", seq_num++}, + {"output_index", 0}, + {"content_index", 0}, + {"item_id", oai_resp_message_id}, + {"part", content_part}, }} }); const json output_item = { @@ -1049,8 +1097,10 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} + {"type", "response.output_item.done"}, + {"sequence_number", seq_num++}, + {"output_index", 0}, + {"item", output_item}, }} }); output.push_back(output_item); @@ -1059,39 +1109,81 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { const json output_item = { {"type", "function_call"}, - {"status", "completed"}, + {"id", "fc_" + random_string()}, + {"call_id", tool_call.id}, + {"name", tool_call.name}, {"arguments", tool_call.arguments}, - {"call_id", "fc_" + tool_call.id}, - {"name", tool_call.name} + {"status", "completed"}, }; server_sent_events.push_back(json { {"event", "response.output_item.done"}, {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} + {"type", "response.output_item.done"}, + {"sequence_number", seq_num++}, + {"output_index", 0}, + {"item", output_item}, }} }); output.push_back(output_item); } + // Build output_text convenience field for streaming final event + std::string output_text_stream; + for (const auto & item : output) { + if (json_value(item, "type", std::string()) == "message") { + for (const auto & part : item.at("content")) { + if (json_value(part, "type", std::string()) == "output_text") { + output_text_stream += part.at("text").get(); + } + } + } + } + std::time_t t = std::time(0); server_sent_events.push_back(json { {"event", "response.completed"}, {"data", json { - {"type", "response.completed"}, + {"type", "response.completed"}, + {"sequence_number", seq_num++}, {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"created_at", t}, - {"status", "completed"}, - {"model", oaicompat_model}, - {"output", output}, - {"usage", json { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", n_decoded}, - {"total_tokens", n_decoded + n_prompt_tokens}, - {"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }}, - }} + {"completed_at", t}, + {"created_at", t}, + {"id", oai_resp_id}, + {"object", "response"}, + {"status", "completed"}, + {"model", oaicompat_model}, + {"output", output}, + {"output_text", output_text_stream}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens}, + {"input_tokens_details", json{{"cached_tokens", n_prompt_tokens_cache}}}, + {"output_tokens_details", json{{"reasoning_tokens", 0}}}, + }}, + {"incomplete_details", nullptr}, + {"previous_response_id", nullptr}, + {"instructions", nullptr}, + {"error", nullptr}, + {"tools", json::array()}, + {"tool_choice", "auto"}, + {"truncation", "disabled"}, + {"parallel_tool_calls", false}, + {"text", json{{"format", json{{"type", "text"}}}}}, + {"top_p", 1.0}, + {"presence_penalty", 0.0}, + {"frequency_penalty", 0.0}, + {"top_logprobs", 0}, + {"temperature", 1.0}, + {"reasoning", nullptr}, + {"max_output_tokens", nullptr}, + {"max_tool_calls", nullptr}, + {"store", false}, + {"background", false}, + {"service_tier", "default"}, + {"safety_identifier", nullptr}, + {"prompt_cache_key", nullptr}, + {"metadata", json::object()}, }}, }} }); diff --git a/tools/server/server-task.h b/tools/server/server-task.h index a49ddb594b..28ec7b8f6b 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -370,6 +370,7 @@ struct server_task_result_cmpl_final : server_task_result { std::string oai_resp_id; std::string oai_resp_reasoning_id; std::string oai_resp_message_id; + int oai_resp_seq_num = 0; virtual bool is_stop() override { return true; // in stream mode, final responses are considered stop