From 8686b807f71ca53d4b3b5901ddf940493f49e0ec Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 17 Mar 2026 17:31:52 +0100 Subject: [PATCH] common/chat, server: refactor, move all conversion functions to common, add tests --- common/CMakeLists.txt | 2 + common/chat-conversion.cpp | 616 ++++++++++++++++++++++++++++++++ common/chat-conversion.h | 24 ++ common/chat.cpp | 58 --- common/chat.h | 7 +- tests/test-chat.cpp | 129 ++++++- tools/server/server-common.cpp | 514 +------------------------- tools/server/server-common.h | 6 - tools/server/server-context.cpp | 6 +- 9 files changed, 775 insertions(+), 587 deletions(-) create mode 100644 common/chat-conversion.cpp create mode 100644 common/chat-conversion.h diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 75c6366c7f..bb561f1924 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -55,6 +55,8 @@ add_library(${TARGET} STATIC chat-peg-parser.h chat.cpp chat.h + chat-conversion.cpp + chat-conversion.h common.cpp common.h console.cpp diff --git a/common/chat-conversion.cpp b/common/chat-conversion.cpp new file mode 100644 index 0000000000..0d34e4a87f --- /dev/null +++ b/common/chat-conversion.cpp @@ -0,0 +1,616 @@ +#include "chat-conversion.h" + +#include "common.h" +#include "log.h" +#include "jinja/caps.h" + +#include + +#define JSON_ASSERT GGML_ASSERT + +// Helper function for JSON value extraction with default +template +static T json_value(const json & body, const std::string & key, const T & default_value) { + // Fallback null to default value + if (body.contains(key) && !body.at(key).is_null()) { + try { + return body.at(key); + } catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const & err) { + LOG_WRN("Wrong type supplied for parameter '%s'. Expected '%s', using default value: %s\n", key.c_str(), json(default_value).type_name(), err.what()); + return default_value; + } + } else { + return default_value; + } +} + +// Helper function to render messages to JSON (shared with chat.cpp) +static json render_message_to_json(const std::vector & msgs, const jinja::caps & c) { + if (!c.supports_string_content && !c.supports_typed_content) { + LOG_WRN("%s: Neither string content nor typed content is supported by the template. This is unexpected and may lead to issues.\n", __func__); + } + + bool only_string_accepted = c.supports_string_content && !c.supports_typed_content; + bool only_typed_accepted = !c.supports_string_content && c.supports_typed_content; + + json messages = json::array(); + for (const auto & msg : msgs) { + if (only_string_accepted) { + json jmsg = msg.to_json_oaicompat(/* concat_typed_text= */ true); + messages.push_back(jmsg); + } else if (only_typed_accepted) { + json jmsg = msg.to_json_oaicompat(/* concat_typed_text= */ false); + if (jmsg.at("content").is_string()) { + jmsg["content"] = json::array({ + json{ + {"type", "text"}, + {"text", jmsg.at("content").get()}, + } + }); + } + messages.push_back(jmsg); + } else { + json jmsg = msg.to_json_oaicompat(/* concat_typed_text= */ false); + messages.push_back(jmsg); + } + } + return messages; +} + +json common_chat_convert_responses_to_chatcmpl(const json & response_body) { + if (!response_body.contains("input")) { + throw std::invalid_argument("'input' is required"); + } + if (!json_value(response_body, "previous_response_id", std::string{}).empty()) { + throw std::invalid_argument("llama.cpp does not support 'previous_response_id'."); + } + + const json input_value = response_body.at("input"); + json chatcmpl_body = response_body; + chatcmpl_body.erase("input"); + std::vector chatcmpl_messages; + + if (response_body.contains("instructions")) { + chatcmpl_messages.push_back({ + {"role", "system"}, + {"content", json_value(response_body, "instructions", std::string())}, + }); + chatcmpl_body.erase("instructions"); + } + + if (input_value.is_string()) { + // #responses_create-input-text_input + chatcmpl_messages.push_back({ + {"role", "user"}, + {"content", input_value}, + }); + } else if (input_value.is_array()) { + // #responses_create-input-input_item_list + + static auto exists_and_is_array = [](const json & j, const char * key) -> bool { + return j.contains(key) && j.at(key).is_array(); + }; + static auto exists_and_is_string = [](const json & j, const char * key) -> bool { + return j.contains(key) && j.at(key).is_string(); + }; + + for (json item : input_value) { + bool merge_prev = !chatcmpl_messages.empty() && chatcmpl_messages.back().value("role", "") == "assistant"; + + if (exists_and_is_string(item, "content")) { + // #responses_create-input-input_item_list-input_message-content-text_input + // Only "Input message" contains item["content"]::string + // After converting item["content"]::string to item["content"]::array, + // we can treat "Input message" as sum of "Item-Input message" and "Item-Output message" + item["content"] = json::array({ + json { + {"text", item.at("content")}, + {"type", "input_text"} + } + }); + } + + if (exists_and_is_array(item, "content") && + exists_and_is_string(item, "role") && + (item.at("role") == "user" || + item.at("role") == "system" || + item.at("role") == "developer") + ) { + // #responses_create-input-input_item_list-item-input_message + std::vector chatcmpl_content; + + for (const json & input_item : item.at("content")) { + const std::string type = json_value(input_item, "type", std::string()); + + if (type == "input_text") { + if (!input_item.contains("text")) { + throw std::invalid_argument("'Input text' requires 'text'"); + } + chatcmpl_content.push_back({ + {"text", input_item.at("text")}, + {"type", "text"}, + }); + } else if (type == "input_image") { + // While `detail` is marked as required, + // it has default value("auto") and can be omitted. + + if (!input_item.contains("image_url")) { + throw std::invalid_argument("'image_url' is required"); + } + chatcmpl_content.push_back({ + {"image_url", json { + {"url", input_item.at("image_url")} + }}, + {"type", "image_url"}, + }); + } else if (type == "input_file") { + throw std::invalid_argument("'input_file' is not supported by llamacpp at this moment"); + } else { + throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'"); + } + } + + if (item.contains("type")) { + item.erase("type"); + } + if (item.contains("status")) { + item.erase("status"); + } + item["content"] = chatcmpl_content; + + chatcmpl_messages.push_back(item); + } else if (exists_and_is_string(item, "role") && + item.at("role") == "assistant" && + exists_and_is_string(item, "type") && + item.at("type") == "message" + ) { + // #responses_create-input-input_item_list-item-output_message + auto chatcmpl_content = json::array(); + + // Handle both string content and array content + if (item.contains("content") && item.at("content").is_string()) { + // String content - convert to text content part + chatcmpl_content.push_back({ + {"text", item.at("content")}, + {"type", "text"}, + }); + } else if (exists_and_is_array(item, "content")) { + // Array content - process each item + for (const auto & output_text : item.at("content")) { + const std::string type = json_value(output_text, "type", std::string()); + if (type == "output_text" || type == "input_text") { + // Accept both output_text and input_text (string content gets converted to input_text) + if (!exists_and_is_string(output_text, "text")) { + throw std::invalid_argument("'Output text' requires 'text'"); + } + chatcmpl_content.push_back({ + {"text", output_text.at("text")}, + {"type", "text"}, + }); + } else if (type == "refusal") { + if (!exists_and_is_string(output_text, "refusal")) { + throw std::invalid_argument("'Refusal' requires 'refusal'"); + } + chatcmpl_content.push_back({ + {"refusal", output_text.at("refusal")}, + {"type", "refusal"}, + }); + } else { + throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'"); + } + } + } + + if (merge_prev) { + auto & prev_msg = chatcmpl_messages.back(); + if (!exists_and_is_array(prev_msg, "content")) { + prev_msg["content"] = json::array(); + } + auto & prev_content = prev_msg["content"]; + prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end()); + } else { + item.erase("status"); + item.erase("type"); + item["content"] = chatcmpl_content; + chatcmpl_messages.push_back(item); + } + } else if (exists_and_is_string(item, "arguments") && + exists_and_is_string(item, "call_id") && + exists_and_is_string(item, "name") && + exists_and_is_string(item, "type") && + item.at("type") == "function_call" + ) { + // #responses_create-input-input_item_list-item-function_tool_call + json tool_call = { + {"function", json { + {"arguments", item.at("arguments")}, + {"name", item.at("name")}, + }}, + {"id", item.at("call_id")}, + {"type", "function"}, + }; + + if (merge_prev) { + auto & prev_msg = chatcmpl_messages.back(); + if (!exists_and_is_array(prev_msg, "tool_calls")) { + prev_msg["tool_calls"] = json::array(); + } + prev_msg["tool_calls"].push_back(tool_call); + } else { + chatcmpl_messages.push_back(json { + {"role", "assistant"}, + {"tool_calls", json::array({tool_call})} + }); + } + } else if (exists_and_is_string(item, "call_id") && + (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) && + exists_and_is_string(item, "type") && + item.at("type") == "function_call_output" + ) { + // #responses_create-input-input_item_list-item-function_tool_call_output + if (item.at("output").is_string()) { + chatcmpl_messages.push_back(json { + {"content", item.at("output")}, + {"role", "tool"}, + {"tool_call_id", item.at("call_id")}, + }); + } else { + json chatcmpl_outputs = item.at("output"); + for (json & chatcmpl_output : chatcmpl_outputs) { + if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") { + throw std::invalid_argument("Output of tool call should be 'Input text'"); + } + chatcmpl_output["type"] = "text"; + } + chatcmpl_messages.push_back(json { + {"content", chatcmpl_outputs}, + {"role", "tool"}, + {"tool_call_id", item.at("call_id")}, + }); + } + } else if (exists_and_is_array(item, "summary") && + exists_and_is_string(item, "type") && + item.at("type") == "reasoning") { + // #responses_create-input-input_item_list-item-reasoning + + if (!exists_and_is_array(item, "content")) { + throw std::invalid_argument("item['content'] is not an array"); + } + if (item.at("content").empty()) { + throw std::invalid_argument("item['content'] is empty"); + } + if (!exists_and_is_string(item.at("content")[0], "text")) { + throw std::invalid_argument("item['content']['text'] is not a string"); + } + + if (merge_prev) { + auto & prev_msg = chatcmpl_messages.back(); + prev_msg["reasoning_content"] = item.at("content")[0].at("text"); + } else { + chatcmpl_messages.push_back(json { + {"role", "assistant"}, + {"content", json::array()}, + {"reasoning_content", item.at("content")[0].at("text")}, + }); + } + } else { + throw std::invalid_argument("Cannot determine type of 'item'"); + } + } + } else { + throw std::invalid_argument("'input' must be a string or array of objects"); + } + + chatcmpl_body["messages"] = chatcmpl_messages; + + if (response_body.contains("tools")) { + if (!response_body.at("tools").is_array()) { + throw std::invalid_argument("'tools' must be an array of objects"); + } + std::vector chatcmpl_tools; + for (json resp_tool : response_body.at("tools")) { + json chatcmpl_tool; + + if (json_value(resp_tool, "type", std::string()) != "function") { + throw std::invalid_argument("'type' of tool must be 'function'"); + } + resp_tool.erase("type"); + chatcmpl_tool["type"] = "function"; + + if (!resp_tool.contains("strict")) { + resp_tool["strict"] = true; + } + chatcmpl_tool["function"] = resp_tool; + chatcmpl_tools.push_back(chatcmpl_tool); + } + chatcmpl_body.erase("tools"); + chatcmpl_body["tools"] = chatcmpl_tools; + } + + if (response_body.contains("max_output_tokens")) { + chatcmpl_body.erase("max_output_tokens"); + chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; + } + + return chatcmpl_body; +} + +json common_chat_convert_anthropic_to_oai(const json & body) { + json oai_body; + + // Convert system prompt + json oai_messages = json::array(); + auto system_param = json_value(body, "system", json()); + if (!system_param.is_null()) { + std::string system_content; + + if (system_param.is_string()) { + system_content = system_param.get(); + } else if (system_param.is_array()) { + for (const auto & block : system_param) { + if (json_value(block, "type", std::string()) == "text") { + system_content += json_value(block, "text", std::string()); + } + } + } + + oai_messages.push_back({ + {"role", "system"}, + {"content", system_content} + }); + } + + // Convert messages + if (!body.contains("messages")) { + throw std::runtime_error("'messages' is required"); + } + const json & messages = body.at("messages"); + if (messages.is_array()) { + for (const auto & msg : messages) { + std::string role = json_value(msg, "role", std::string()); + + if (!msg.contains("content")) { + if (role == "assistant") { + continue; + } + oai_messages.push_back(msg); + continue; + } + + const json & content = msg.at("content"); + + if (content.is_string()) { + oai_messages.push_back(msg); + continue; + } + + if (!content.is_array()) { + oai_messages.push_back(msg); + continue; + } + + json tool_calls = json::array(); + json converted_content = json::array(); + json tool_results = json::array(); + std::string reasoning_content; + bool has_tool_calls = false; + + for (const auto & block : content) { + std::string type = json_value(block, "type", std::string()); + + if (type == "text") { + converted_content.push_back(block); + } else if (type == "thinking") { + reasoning_content += json_value(block, "thinking", std::string()); + } else if (type == "image") { + json source = json_value(block, "source", json::object()); + std::string source_type = json_value(source, "type", std::string()); + + if (source_type == "base64") { + std::string media_type = json_value(source, "media_type", std::string("image/jpeg")); + std::string data = json_value(source, "data", std::string()); + std::ostringstream ss; + ss << "data:" << media_type << ";base64," << data; + + converted_content.push_back({ + {"type", "image_url"}, + {"image_url", { + {"url", ss.str()} + }} + }); + } else if (source_type == "url") { + std::string url = json_value(source, "url", std::string()); + converted_content.push_back({ + {"type", "image_url"}, + {"image_url", { + {"url", url} + }} + }); + } + } else if (type == "tool_use") { + tool_calls.push_back({ + {"id", json_value(block, "id", std::string())}, + {"type", "function"}, + {"function", { + {"name", json_value(block, "name", std::string())}, + {"arguments", json_value(block, "input", json::object()).dump()} + }} + }); + has_tool_calls = true; + } else if (type == "tool_result") { + std::string tool_use_id = json_value(block, "tool_use_id", std::string()); + + auto result_content = json_value(block, "content", json()); + std::string result_text; + if (result_content.is_string()) { + result_text = result_content.get(); + } else if (result_content.is_array()) { + for (const auto & c : result_content) { + if (json_value(c, "type", std::string()) == "text") { + result_text += json_value(c, "text", std::string()); + } + } + } + + tool_results.push_back({ + {"role", "tool"}, + {"tool_call_id", tool_use_id}, + {"content", result_text} + }); + } + } + + if (!converted_content.empty() || has_tool_calls || !reasoning_content.empty()) { + json new_msg = {{"role", role}}; + if (!converted_content.empty()) { + new_msg["content"] = converted_content; + } else if (has_tool_calls || !reasoning_content.empty()) { + new_msg["content"] = ""; + } + if (!tool_calls.empty()) { + new_msg["tool_calls"] = tool_calls; + } + if (!reasoning_content.empty()) { + new_msg["reasoning_content"] = reasoning_content; + } + oai_messages.push_back(new_msg); + } + + for (const auto & tool_msg : tool_results) { + oai_messages.push_back(tool_msg); + } + } + } + + oai_body["messages"] = oai_messages; + + // Convert tools + if (body.contains("tools")) { + const json & tools = body.at("tools"); + if (tools.is_array()) { + json oai_tools = json::array(); + for (const auto & tool : tools) { + oai_tools.push_back({ + {"type", "function"}, + {"function", { + {"name", json_value(tool, "name", std::string())}, + {"description", json_value(tool, "description", std::string())}, + {"parameters", tool.contains("input_schema") ? tool.at("input_schema") : json::object()} + }} + }); + } + oai_body["tools"] = oai_tools; + } + } + + // Convert tool_choice + if (body.contains("tool_choice")) { + const json & tc = body.at("tool_choice"); + if (tc.is_object()) { + std::string type = json_value(tc, "type", std::string()); + if (type == "auto") { + oai_body["tool_choice"] = "auto"; + } else if (type == "any" || type == "tool") { + oai_body["tool_choice"] = "required"; + } + } + } + + // Convert stop_sequences to stop + if (body.contains("stop_sequences")) { + oai_body["stop"] = body.at("stop_sequences"); + } + + // Handle max_tokens (required in Anthropic, but we're permissive) + if (body.contains("max_tokens")) { + oai_body["max_tokens"] = body.at("max_tokens"); + } else { + oai_body["max_tokens"] = 4096; + } + + // Pass through common params + for (const auto & key : {"temperature", "top_p", "top_k", "stream"}) { + if (body.contains(key)) { + oai_body[key] = body.at(key); + } + } + + // Handle Anthropic-specific thinking param + if (body.contains("thinking")) { + json thinking = json_value(body, "thinking", json::object()); + std::string thinking_type = json_value(thinking, "type", std::string()); + if (thinking_type == "enabled") { + int budget_tokens = json_value(thinking, "budget_tokens", 10000); + oai_body["thinking_budget_tokens"] = budget_tokens; + } + } + + // Handle Anthropic-specific metadata param + if (body.contains("metadata")) { + json metadata = json_value(body, "metadata", json::object()); + std::string user_id = json_value(metadata, "user_id", std::string()); + if (!user_id.empty()) { + oai_body["__metadata_user_id"] = user_id; + } + } + + return oai_body; +} + +// DEPRECATED: only used in tests +json common_chat_msgs_to_json_oaicompat(const std::vector & msgs, bool concat_typed_text) { + jinja::caps c; + c.supports_string_content = true; + c.supports_typed_content = !concat_typed_text; + return render_message_to_json(msgs, c); +} + +json common_chat_tools_to_json_oaicompat(const std::vector & tools) { + if (tools.empty()) { + return json(); + } + + auto result = json::array(); + for (const auto & tool : tools) { + result.push_back({ + { "type", "function" }, + { "function", + { + { "name", tool.name }, + { "description", tool.description }, + { "parameters", json::parse(tool.parameters) }, + } }, + }); + } + return result; +} + +json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) { + json delta = json::object(); + if (!diff.reasoning_content_delta.empty()) { + delta["reasoning_content"] = diff.reasoning_content_delta; + } + if (!diff.content_delta.empty()) { + delta["content"] = diff.content_delta; + } + if (diff.tool_call_index != std::string::npos) { + json tool_call; + tool_call["index"] = diff.tool_call_index; + if (!diff.tool_call_delta.id.empty()) { + tool_call["id"] = diff.tool_call_delta.id; + tool_call["type"] = "function"; + } + if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) { + json function = json::object(); + if (!diff.tool_call_delta.name.empty()) { + function["name"] = diff.tool_call_delta.name; + } + if (!diff.tool_call_delta.arguments.empty()) { + function["arguments"] = diff.tool_call_delta.arguments; + } + tool_call["function"] = function; + } + delta["tool_calls"] = json::array({ tool_call }); + } + return delta; +} diff --git a/common/chat-conversion.h b/common/chat-conversion.h new file mode 100644 index 0000000000..41a552e569 --- /dev/null +++ b/common/chat-conversion.h @@ -0,0 +1,24 @@ +// Chat conversion functions for OpenAI API compatibility + +#pragma once + +#include "chat.h" +#include "nlohmann/json.hpp" + +#include +#include + +using json = nlohmann::ordered_json; + +// Convert OpenAI Responses API format to OpenAI Chat Completions API format +json common_chat_convert_responses_to_chatcmpl(const json & body); + +// Convert Anthropic Messages API format to OpenAI Chat Completions API format +json common_chat_convert_anthropic_to_oai(const json & body); + +// DEPRECATED: only used in tests +json common_chat_msgs_to_json_oaicompat(const std::vector & msgs, bool concat_typed_text = false); + +json common_chat_tools_to_json_oaicompat(const std::vector & tools); + +json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff); diff --git a/common/chat.cpp b/common/chat.cpp index 6addf613fa..b48a98a1a0 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -385,14 +385,6 @@ static json render_message_to_json(const std::vector & msgs, co return messages; } -// DEPRECATED: only used in tests -json common_chat_msgs_to_json_oaicompat(const std::vector & msgs, bool concat_typed_text) { - jinja::caps c; - c.supports_string_content = true; - c.supports_typed_content = !concat_typed_text; - return render_message_to_json(msgs, c); -} - std::vector common_chat_tools_parse_oaicompat(const json & tools) { std::vector result; @@ -428,56 +420,6 @@ std::vector common_chat_tools_parse_oaicompat(const json & too return result; } -json common_chat_tools_to_json_oaicompat(const std::vector & tools) { - if (tools.empty()) { - return json(); - } - - auto result = json::array(); - for (const auto & tool : tools) { - result.push_back({ - { "type", "function" }, - { "function", - { - { "name", tool.name }, - { "description", tool.description }, - { "parameters", json::parse(tool.parameters) }, - } }, - }); - } - return result; -} - -json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) { - json delta = json::object(); - if (!diff.reasoning_content_delta.empty()) { - delta["reasoning_content"] = diff.reasoning_content_delta; - } - if (!diff.content_delta.empty()) { - delta["content"] = diff.content_delta; - } - if (diff.tool_call_index != std::string::npos) { - json tool_call; - tool_call["index"] = diff.tool_call_index; - if (!diff.tool_call_delta.id.empty()) { - tool_call["id"] = diff.tool_call_delta.id; - tool_call["type"] = "function"; - } - if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) { - json function = json::object(); - if (!diff.tool_call_delta.name.empty()) { - function["name"] = diff.tool_call_delta.name; - } - if (!diff.tool_call_delta.arguments.empty()) { - function["arguments"] = diff.tool_call_delta.arguments; - } - tool_call["function"] = function; - } - delta["tool_calls"] = json::array({ tool_call }); - } - return delta; -} - bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { if (use_jinja) { try { diff --git a/common/chat.h b/common/chat.h index 23e80baf69..db88ab3153 100644 --- a/common/chat.h +++ b/common/chat.h @@ -289,13 +289,10 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates * // Parses a JSON array of messages in OpenAI's chat completion API format. std::vector common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages); -// DEPRECATED: only used in tests -nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector & msgs, bool concat_typed_text = false); - std::vector common_chat_tools_parse_oaicompat(const nlohmann::ordered_json & tools); -nlohmann::ordered_json common_chat_tools_to_json_oaicompat(const std::vector & tools); -nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff); +// Chat conversion functions (defined in chat-conversion.h) +#include "chat-conversion.h" // get template caps, useful for reporting to server /props endpoint std::map common_chat_templates_get_caps(const common_chat_templates * chat_templates); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 915b6f71dc..018ec9c151 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1247,6 +1247,117 @@ static void test_tools_oaicompat_json_conversion() { common_chat_tools_to_json_oaicompat({ special_function_tool }).dump(2)); } +static void test_convert_responses_to_chatcmpl() { + LOG_DBG("%s\n", __func__); + + // Test basic conversion with input messages (user/assistant alternating) + { + json input = json::parse(R"({ + "input": [ + { + "type": "message", + "role": "user", + "content": "hi wassup" + }, + { + "type": "message", + "role": "assistant", + "content": "Hey! 👋 Not much, just here ready to chat. What's up with you? Anything I can help you with today?" + }, + { + "type": "message", + "role": "user", + "content": "hi" + } + ], + "model": "gpt-5-mini", + "stream": false, + "text": {}, + "reasoning": { + "effort": "medium" + } + })"); + + json result = common_chat_convert_responses_to_chatcmpl(input); + + // Verify messages were converted correctly + assert_equals(true, result.contains("messages")); + assert_equals(true, result.at("messages").is_array()); + assert_equals((size_t)3, result.at("messages").size()); + + // Check first message (user) + const auto & msg0 = result.at("messages")[0]; + assert_equals(std::string("user"), msg0.at("role").get()); + assert_equals(true, msg0.at("content").is_array()); + assert_equals(std::string("text"), msg0.at("content")[0].at("type").get()); + assert_equals(std::string("hi wassup"), msg0.at("content")[0].at("text").get()); + + // Check second message (assistant) + const auto & msg1 = result.at("messages")[1]; + assert_equals(std::string("assistant"), msg1.at("role").get()); + assert_equals(true, msg1.at("content").is_array()); + assert_equals(std::string("text"), msg1.at("content")[0].at("type").get()); + assert_equals(std::string("Hey! 👋 Not much, just here ready to chat. What's up with you? Anything I can help you with today?"), msg1.at("content")[0].at("text").get()); + + // Check third message (user) + const auto & msg2 = result.at("messages")[2]; + assert_equals(std::string("user"), msg2.at("role").get()); + assert_equals(true, msg2.at("content").is_array()); + assert_equals(std::string("text"), msg2.at("content")[0].at("type").get()); + assert_equals(std::string("hi"), msg2.at("content")[0].at("text").get()); + + // Verify other fields preserved + assert_equals(std::string("gpt-5-mini"), result.at("model").get()); + assert_equals(false, result.at("stream").get()); + } + + // Test string input + { + json input = json::parse(R"({ + "input": "Hello, world!", + "model": "test-model" + })"); + + json result = common_chat_convert_responses_to_chatcmpl(input); + + assert_equals((size_t)1, result.at("messages").size()); + const auto & msg = result.at("messages")[0]; + assert_equals(std::string("user"), msg.at("role").get()); + assert_equals(std::string("Hello, world!"), msg.at("content").get()); + } + + // Test with instructions (system message) + { + json input = json::parse(R"({ + "input": "Hello", + "instructions": "You are a helpful assistant.", + "model": "test-model" + })"); + + json result = common_chat_convert_responses_to_chatcmpl(input); + + assert_equals((size_t)2, result.at("messages").size()); + const auto & sys_msg = result.at("messages")[0]; + assert_equals(std::string("system"), sys_msg.at("role").get()); + assert_equals(std::string("You are a helpful assistant."), sys_msg.at("content").get()); + } + + // Test with max_output_tokens conversion + { + json input = json::parse(R"({ + "input": "Hello", + "model": "test-model", + "max_output_tokens": 100 + })"); + + json result = common_chat_convert_responses_to_chatcmpl(input); + + assert_equals(true, result.contains("max_tokens")); + assert_equals(false, result.contains("max_output_tokens")); + assert_equals(100, result.at("max_tokens").get()); + } +} + static void test_template_output_peg_parsers(bool detailed_debug) { LOG_DBG("%s\n", __func__); @@ -2891,7 +3002,7 @@ int main(int argc, char ** argv) { bool detailed_debug = false; bool only_run_filtered = false; - // Check for --template flag + // Check for --template and --detailed flags for (int i = 1; i < argc; i++) { std::string arg = argv[i]; if (arg == "--template" && i + 1 < argc) { @@ -2912,7 +3023,20 @@ int main(int argc, char ** argv) { } #ifndef _WIN32 - if (argc > 1) { + // Check if any argument is a .jinja file (for template format detection mode) + bool has_jinja_files = false; + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + if (arg == "--detailed") { + continue; + } + if (arg.size() >= 6 && arg.rfind(".jinja") == arg.size() - 6) { + has_jinja_files = true; + break; + } + } + + if (has_jinja_files) { common_chat_templates_inputs inputs; common_chat_msg msg; msg.role = "user"; @@ -2945,6 +3069,7 @@ int main(int argc, char ** argv) { test_msg_diffs_compute(); test_msgs_oaicompat_json_conversion(); test_tools_oaicompat_json_conversion(); + test_convert_responses_to_chatcmpl(); test_developer_role_to_system_workaround(); test_template_output_peg_parsers(detailed_debug); std::cout << "\n[chat] All tests passed!" << '\n'; diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 59ea11fc47..c1d2c5bb78 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -5,6 +5,7 @@ #include "mtmd.h" #include "mtmd-helper.h" #include "chat.h" +#include "chat-conversion.h" #include "base64.hpp" #include "server-common.h" @@ -1142,519 +1143,6 @@ json oaicompat_chat_params_parse( return llama_params; } -json convert_responses_to_chatcmpl(const json & response_body) { - if (!response_body.contains("input")) { - throw std::invalid_argument("'input' is required"); - } - if (!json_value(response_body, "previous_response_id", std::string{}).empty()) { - throw std::invalid_argument("llama.cpp does not support 'previous_response_id'."); - } - - const json input_value = response_body.at("input"); - json chatcmpl_body = response_body; - chatcmpl_body.erase("input"); - std::vector chatcmpl_messages; - - if (response_body.contains("instructions")) { - chatcmpl_messages.push_back({ - {"role", "system"}, - {"content", json_value(response_body, "instructions", std::string())}, - }); - chatcmpl_body.erase("instructions"); - } - - if (input_value.is_string()) { - // #responses_create-input-text_input - chatcmpl_messages.push_back({ - {"role", "user"}, - {"content", input_value}, - }); - } else if (input_value.is_array()) { - // #responses_create-input-input_item_list - - static auto exists_and_is_array = [](const json & j, const char * key) -> bool { - return j.contains(key) && j.at(key).is_array(); - }; - static auto exists_and_is_string = [](const json & j, const char * key) -> bool { - return j.contains(key) && j.at(key).is_string(); - }; - - for (json item : input_value) { - bool merge_prev = !chatcmpl_messages.empty() && chatcmpl_messages.back().value("role", "") == "assistant"; - - if (exists_and_is_string(item, "content")) { - // #responses_create-input-input_item_list-input_message-content-text_input - // Only "Input message" contains item["content"]::string - // After converting item["content"]::string to item["content"]::array, - // we can treat "Input message" as sum of "Item-Input message" and "Item-Output message" - item["content"] = json::array({ - json { - {"text", item.at("content")}, - {"type", "input_text"} - } - }); - } - - if (exists_and_is_array(item, "content") && - exists_and_is_string(item, "role") && - (item.at("role") == "user" || - item.at("role") == "system" || - item.at("role") == "developer") - ) { - // #responses_create-input-input_item_list-item-input_message - std::vector chatcmpl_content; - - for (const json & input_item : item.at("content")) { - const std::string type = json_value(input_item, "type", std::string()); - - if (type == "input_text") { - if (!input_item.contains("text")) { - throw std::invalid_argument("'Input text' requires 'text'"); - } - chatcmpl_content.push_back({ - {"text", input_item.at("text")}, - {"type", "text"}, - }); - } else if (type == "input_image") { - // While `detail` is marked as required, - // it has default value("auto") and can be omitted. - - if (!input_item.contains("image_url")) { - throw std::invalid_argument("'image_url' is required"); - } - chatcmpl_content.push_back({ - {"image_url", json { - {"url", input_item.at("image_url")} - }}, - {"type", "image_url"}, - }); - } else if (type == "input_file") { - throw std::invalid_argument("'input_file' is not supported by llamacpp at this moment"); - // if (input_item.contains("file_url")) { - // // chat completion API does not support file_url - // throw std::invalid_argument("'file_url' is not supported"); - // } - // if (!input_item.contains("file_data") || !input_item.contains("filename")) { - // throw std::invalid_argument("Both 'file_data' and 'filename' are required"); - // } - // chatcmpl_content.push_back({ - // {"file", json { - // {"file_data", input_item.at("file_data")}, - // {"filename", input_item.at("filename")}, - // }}, - // {"type", "file"}, - // }); - } else { - throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'"); - } - } - - if (item.contains("type")) { - item.erase("type"); - } - if (item.contains("status")) { - item.erase("status"); - } - item["content"] = chatcmpl_content; - - chatcmpl_messages.push_back(item); - } else if (exists_and_is_array(item, "content") && - exists_and_is_string(item, "role") && - item.at("role") == "assistant" && - // exists_and_is_string(item, "status") && - // (item.at("status") == "in_progress" || - // item.at("status") == "completed" || - // item.at("status") == "incomplete") && - // item["status"] not sent by codex-cli - exists_and_is_string(item, "type") && - item.at("type") == "message" - ) { - // #responses_create-input-input_item_list-item-output_message - auto chatcmpl_content = json::array(); - - for (const auto & output_text : item.at("content")) { - const std::string type = json_value(output_text, "type", std::string()); - if (type == "output_text") { - if (!exists_and_is_string(output_text, "text")) { - throw std::invalid_argument("'Output text' requires 'text'"); - // Ignore annotations and logprobs for now - chatcmpl_content.push_back({ - {"text", output_text.at("text")}, - {"type", "text"}, - }); - } - } else if (type == "refusal") { - if (!exists_and_is_string(output_text, "refusal")) { - throw std::invalid_argument("'Refusal' requires 'refusal'"); - // Ignore annotations and logprobs for now - chatcmpl_content.push_back({ - {"refusal", output_text.at("refusal")}, - {"type", "refusal"}, - }); - } - } else { - throw std::invalid_argument("'type' must be one of 'output_text' or 'refusal'"); - } - } - - if (merge_prev) { - auto & prev_msg = chatcmpl_messages.back(); - if (!exists_and_is_array(prev_msg, "content")) { - prev_msg["content"] = json::array(); - } - auto & prev_content = prev_msg["content"]; - prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end()); - } else { - item.erase("status"); - item.erase("type"); - item["content"] = chatcmpl_content; - chatcmpl_messages.push_back(item); - } - } else if (exists_and_is_string(item, "arguments") && - exists_and_is_string(item, "call_id") && - exists_and_is_string(item, "name") && - exists_and_is_string(item, "type") && - item.at("type") == "function_call" - ) { - // #responses_create-input-input_item_list-item-function_tool_call - json tool_call = { - {"function", json { - {"arguments", item.at("arguments")}, - {"name", item.at("name")}, - }}, - {"id", item.at("call_id")}, - {"type", "function"}, - }; - - if (merge_prev) { - auto & prev_msg = chatcmpl_messages.back(); - if (!exists_and_is_array(prev_msg, "tool_calls")) { - prev_msg["tool_calls"] = json::array(); - } - prev_msg["tool_calls"].push_back(tool_call); - } else { - chatcmpl_messages.push_back(json { - {"role", "assistant"}, - {"tool_calls", json::array({tool_call})} - }); - } - } else if (exists_and_is_string(item, "call_id") && - (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) && - exists_and_is_string(item, "type") && - item.at("type") == "function_call_output" - ) { - // #responses_create-input-input_item_list-item-function_tool_call_output - if (item.at("output").is_string()) { - chatcmpl_messages.push_back(json { - {"content", item.at("output")}, - {"role", "tool"}, - {"tool_call_id", item.at("call_id")}, - }); - } else { - json chatcmpl_outputs = item.at("output"); - for (json & chatcmpl_output : chatcmpl_outputs) { - if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") { - throw std::invalid_argument("Output of tool call should be 'Input text'"); - } - chatcmpl_output["type"] = "text"; - } - chatcmpl_messages.push_back(json { - {"content", chatcmpl_outputs}, - {"role", "tool"}, - {"tool_call_id", item.at("call_id")}, - }); - } - } else if (// exists_and_is_string(item, "id") && - // item["id"] not sent by codex-cli - exists_and_is_array(item, "summary") && - exists_and_is_string(item, "type") && - item.at("type") == "reasoning") { - // #responses_create-input-input_item_list-item-reasoning - - if (!exists_and_is_array(item, "content")) { - throw std::invalid_argument("item['content'] is not an array"); - } - if (item.at("content").empty()) { - throw std::invalid_argument("item['content'] is empty"); - } - if (!exists_and_is_string(item.at("content")[0], "text")) { - throw std::invalid_argument("item['content']['text'] is not a string"); - } - - if (merge_prev) { - auto & prev_msg = chatcmpl_messages.back(); - prev_msg["reasoning_content"] = item.at("content")[0].at("text"); - } else { - chatcmpl_messages.push_back(json { - {"role", "assistant"}, - {"content", json::array()}, - {"reasoning_content", item.at("content")[0].at("text")}, - }); - } - } else { - throw std::invalid_argument("Cannot determine type of 'item'"); - } - } - } else { - throw std::invalid_argument("'input' must be a string or array of objects"); - } - - chatcmpl_body["messages"] = chatcmpl_messages; - - if (response_body.contains("tools")) { - if (!response_body.at("tools").is_array()) { - throw std::invalid_argument("'tools' must be an array of objects"); - } - std::vector chatcmpl_tools; - for (json resp_tool : response_body.at("tools")) { - json chatcmpl_tool; - - if (json_value(resp_tool, "type", std::string()) != "function") { - throw std::invalid_argument("'type' of tool must be 'function'"); - } - resp_tool.erase("type"); - chatcmpl_tool["type"] = "function"; - - if (!resp_tool.contains("strict")) { - resp_tool["strict"] = true; - } - chatcmpl_tool["function"] = resp_tool; - chatcmpl_tools.push_back(chatcmpl_tool); - } - chatcmpl_body.erase("tools"); - chatcmpl_body["tools"] = chatcmpl_tools; - } - - if (response_body.contains("max_output_tokens")) { - chatcmpl_body.erase("max_output_tokens"); - chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; - } - - return chatcmpl_body; -} - -json convert_anthropic_to_oai(const json & body) { - json oai_body; - - // Convert system prompt - json oai_messages = json::array(); - auto system_param = json_value(body, "system", json()); - if (!system_param.is_null()) { - std::string system_content; - - if (system_param.is_string()) { - system_content = system_param.get(); - } else if (system_param.is_array()) { - for (const auto & block : system_param) { - if (json_value(block, "type", std::string()) == "text") { - system_content += json_value(block, "text", std::string()); - } - } - } - - oai_messages.push_back({ - {"role", "system"}, - {"content", system_content} - }); - } - - // Convert messages - if (!body.contains("messages")) { - throw std::runtime_error("'messages' is required"); - } - const json & messages = body.at("messages"); - if (messages.is_array()) { - for (const auto & msg : messages) { - std::string role = json_value(msg, "role", std::string()); - - if (!msg.contains("content")) { - if (role == "assistant") { - continue; - } - oai_messages.push_back(msg); - continue; - } - - const json & content = msg.at("content"); - - if (content.is_string()) { - oai_messages.push_back(msg); - continue; - } - - if (!content.is_array()) { - oai_messages.push_back(msg); - continue; - } - - json tool_calls = json::array(); - json converted_content = json::array(); - json tool_results = json::array(); - std::string reasoning_content; - bool has_tool_calls = false; - - for (const auto & block : content) { - std::string type = json_value(block, "type", std::string()); - - if (type == "text") { - converted_content.push_back(block); - } else if (type == "thinking") { - reasoning_content += json_value(block, "thinking", std::string()); - } else if (type == "image") { - json source = json_value(block, "source", json::object()); - std::string source_type = json_value(source, "type", std::string()); - - if (source_type == "base64") { - std::string media_type = json_value(source, "media_type", std::string("image/jpeg")); - std::string data = json_value(source, "data", std::string()); - std::ostringstream ss; - ss << "data:" << media_type << ";base64," << data; - - converted_content.push_back({ - {"type", "image_url"}, - {"image_url", { - {"url", ss.str()} - }} - }); - } else if (source_type == "url") { - std::string url = json_value(source, "url", std::string()); - converted_content.push_back({ - {"type", "image_url"}, - {"image_url", { - {"url", url} - }} - }); - } - } else if (type == "tool_use") { - tool_calls.push_back({ - {"id", json_value(block, "id", std::string())}, - {"type", "function"}, - {"function", { - {"name", json_value(block, "name", std::string())}, - {"arguments", json_value(block, "input", json::object()).dump()} - }} - }); - has_tool_calls = true; - } else if (type == "tool_result") { - std::string tool_use_id = json_value(block, "tool_use_id", std::string()); - - auto result_content = json_value(block, "content", json()); - std::string result_text; - if (result_content.is_string()) { - result_text = result_content.get(); - } else if (result_content.is_array()) { - for (const auto & c : result_content) { - if (json_value(c, "type", std::string()) == "text") { - result_text += json_value(c, "text", std::string()); - } - } - } - - tool_results.push_back({ - {"role", "tool"}, - {"tool_call_id", tool_use_id}, - {"content", result_text} - }); - } - } - - if (!converted_content.empty() || has_tool_calls || !reasoning_content.empty()) { - json new_msg = {{"role", role}}; - if (!converted_content.empty()) { - new_msg["content"] = converted_content; - } else if (has_tool_calls || !reasoning_content.empty()) { - new_msg["content"] = ""; - } - if (!tool_calls.empty()) { - new_msg["tool_calls"] = tool_calls; - } - if (!reasoning_content.empty()) { - new_msg["reasoning_content"] = reasoning_content; - } - oai_messages.push_back(new_msg); - } - - for (const auto & tool_msg : tool_results) { - oai_messages.push_back(tool_msg); - } - } - } - - oai_body["messages"] = oai_messages; - - // Convert tools - if (body.contains("tools")) { - const json & tools = body.at("tools"); - if (tools.is_array()) { - json oai_tools = json::array(); - for (const auto & tool : tools) { - oai_tools.push_back({ - {"type", "function"}, - {"function", { - {"name", json_value(tool, "name", std::string())}, - {"description", json_value(tool, "description", std::string())}, - {"parameters", tool.contains("input_schema") ? tool.at("input_schema") : json::object()} - }} - }); - } - oai_body["tools"] = oai_tools; - } - } - - // Convert tool_choice - if (body.contains("tool_choice")) { - const json & tc = body.at("tool_choice"); - if (tc.is_object()) { - std::string type = json_value(tc, "type", std::string()); - if (type == "auto") { - oai_body["tool_choice"] = "auto"; - } else if (type == "any" || type == "tool") { - oai_body["tool_choice"] = "required"; - } - } - } - - // Convert stop_sequences to stop - if (body.contains("stop_sequences")) { - oai_body["stop"] = body.at("stop_sequences"); - } - - // Handle max_tokens (required in Anthropic, but we're permissive) - if (body.contains("max_tokens")) { - oai_body["max_tokens"] = body.at("max_tokens"); - } else { - oai_body["max_tokens"] = 4096; - } - - // Pass through common params - for (const auto & key : {"temperature", "top_p", "top_k", "stream"}) { - if (body.contains(key)) { - oai_body[key] = body.at(key); - } - } - - // Handle Anthropic-specific thinking param - if (body.contains("thinking")) { - json thinking = json_value(body, "thinking", json::object()); - std::string thinking_type = json_value(thinking, "type", std::string()); - if (thinking_type == "enabled") { - int budget_tokens = json_value(thinking, "budget_tokens", 10000); - oai_body["thinking_budget_tokens"] = budget_tokens; - } - } - - // Handle Anthropic-specific metadata param - if (body.contains("metadata")) { - json metadata = json_value(body, "metadata", json::object()); - std::string user_id = json_value(metadata, "user_id", std::string()); - if (!user_id.empty()) { - oai_body["__metadata_user_id"] = user_id; - } - } - - return oai_body; -} - json format_embeddings_response_oaicompat( const json & request, const std::string & model_name, diff --git a/tools/server/server-common.h b/tools/server/server-common.h index 213ae52bb0..06d9fb8783 100644 --- a/tools/server/server-common.h +++ b/tools/server/server-common.h @@ -302,12 +302,6 @@ json oaicompat_chat_params_parse( const server_chat_params & opt, std::vector & out_files); -// convert OpenAI Responses API format to OpenAI Chat Completions API format -json convert_responses_to_chatcmpl(const json & body); - -// convert Anthropic Messages API format to OpenAI Chat Completions API format -json convert_anthropic_to_oai(const json & body); - // TODO: move it to server-task.cpp json format_embeddings_response_oaicompat( const json & request, diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 1e5ff101c8..6c6c1f7813 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -3670,7 +3670,7 @@ void server_routes::init_routes() { this->post_responses_oai = [this](const server_http_req & req) { auto res = create_response(); std::vector files; - json body = convert_responses_to_chatcmpl(json::parse(req.body)); + json body = common_chat_convert_responses_to_chatcmpl(json::parse(req.body)); SRV_DBG("%s\n", "Request converted: OpenAI Responses -> OpenAI Chat Completions"); SRV_DBG("converted request: %s\n", body.dump().c_str()); json body_parsed = oaicompat_chat_params_parse( @@ -3688,7 +3688,7 @@ void server_routes::init_routes() { this->post_anthropic_messages = [this](const server_http_req & req) { auto res = create_response(); std::vector files; - json body = convert_anthropic_to_oai(json::parse(req.body)); + json body = common_chat_convert_anthropic_to_oai(json::parse(req.body)); SRV_DBG("%s\n", "Request converted: Anthropic -> OpenAI Chat Completions"); SRV_DBG("converted request: %s\n", body.dump().c_str()); json body_parsed = oaicompat_chat_params_parse( @@ -3706,7 +3706,7 @@ void server_routes::init_routes() { this->post_anthropic_count_tokens = [this](const server_http_req & req) { auto res = create_response(); std::vector files; - json body = convert_anthropic_to_oai(json::parse(req.body)); + json body = common_chat_convert_anthropic_to_oai(json::parse(req.body)); SRV_DBG("%s\n", "Request converted: Anthropic -> OpenAI Chat Completions"); SRV_DBG("converted request: %s\n", body.dump().c_str()); json body_parsed = oaicompat_chat_params_parse(