From 1df28c40539df2239db91d62d39c54a49bbbcffa Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 30 Dec 2025 05:44:08 +0000 Subject: [PATCH 1/5] from previous PR --- requirements/requirements-tool_bench.txt | 2 +- tools/server/server-common.cpp | 60 +++++++ tools/server/server-common.h | 5 + tools/server/server-context.cpp | 87 ++++++++++- tools/server/server-context.h | 1 + tools/server/server-task.cpp | 147 ++++++++++++++++++ tools/server/server-task.h | 7 + tools/server/server.cpp | 2 + tools/server/tests/requirements.txt | 2 +- .../tests/unit/test_compat_oai_responses.py | 48 ++++++ 10 files changed, 352 insertions(+), 9 deletions(-) create mode 100644 tools/server/tests/unit/test_compat_oai_responses.py diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt index f7912aff72..3bb74fb9d0 100644 --- a/requirements/requirements-tool_bench.txt +++ b/requirements/requirements-tool_bench.txt @@ -3,7 +3,7 @@ pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 matplotlib~=3.10.0 numpy~=1.26.4 -openai~=1.55.3 +openai~=2.14.0 pandas~=2.2.3 prometheus-client~=0.20.0 requests~=2.32.3 diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index b02afaefda..96ec86edbc 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1070,6 +1070,48 @@ json oaicompat_chat_params_parse( return llama_params; } +json convert_responses_to_chatcmpl(const json & body) { + if (!body.contains("input")) { + throw std::invalid_argument("'input' is required"); + } + if (!json_value(body, "previous_response_id", std::string{}).empty()) { + throw std::invalid_argument("llama.cpp does not support 'previous_response_id'."); + } + + const json input_value = body.at("input"); + json chatcmpl_messages = json::array(); + + if (input_value.is_array()) { + chatcmpl_messages = input_value; + } else if (input_value.is_string()) { + chatcmpl_messages.push_back({ + {"role", "user"}, + {"content", input_value}, + }); + } else { + std::invalid_argument("'input' must be a string or array of objects"); + } + + const std::string instructions = json_value(body, "instructions", std::string{}); + if (instructions != "") { + chatcmpl_messages.push_back({ + {"role", "system"}, + {"content", instructions}, + }); + } + + json chatcmpl_body = body; + chatcmpl_body.erase("input"); + chatcmpl_body["messages"] = chatcmpl_messages; + + if (body.contains("max_output_tokens")) { + chatcmpl_body.erase("max_output_tokens"); + chatcmpl_body["max_tokens"] = body["max_output_tokens"]; + } + + return chatcmpl_body; +} + json convert_anthropic_to_oai(const json & body) { json oai_body; @@ -1478,6 +1520,24 @@ std::string format_oai_sse(const json & data) { return ss.str(); } +std::string format_oai_resp_sse(const json & data) { + std::ostringstream ss; + auto send_single = [&ss](const json & event_obj) { + ss << "event: " << event_obj.at("event").get() << "\n"; + ss << "data: " << safe_json_to_str(event_obj.at("data")) << "\n\n"; + }; + + if (data.is_array()) { + for (const auto & item : data) { + send_single(item); + } + } else { + send_single(data); + } + + return ss.str(); +} + std::string format_anthropic_sse(const json & data) { std::ostringstream ss; diff --git a/tools/server/server-common.h b/tools/server/server-common.h index 152a2a3c46..5827a6fc4d 100644 --- a/tools/server/server-common.h +++ b/tools/server/server-common.h @@ -295,6 +295,9 @@ json oaicompat_chat_params_parse( const oaicompat_parser_options & opt, std::vector & out_files); +// convert OpenAI Responses API format to OpenAI Chat Completions API format +json convert_responses_to_chatcmpl(const json & body); + // convert Anthropic Messages API format to OpenAI Chat Completions API format json convert_anthropic_to_oai(const json & body); @@ -332,6 +335,8 @@ std::string tokens_to_output_formatted_string(const llama_context * ctx, const l // note: if data is a json array, it will be sent as multiple events, one per item std::string format_oai_sse(const json & data); +std::string format_oai_resp_sse(const json & data); + // format Anthropic-style SSE with event types std::string format_anthropic_sse(const json & data); diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 9726e02522..cc2016dff2 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -2981,6 +2981,58 @@ std::unique_ptr server_routes::handle_completions_impl( json first_result_json = first_result->to_json(); if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { res->data = format_anthropic_sse(first_result_json); + } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { + const json created = { + {"event", "response.created"}, + {"data", json { + {"type", "response.created"}, + {"response", json { + {"object", "response"}, + {"status", "in_progress"} + }} + }} + }; + const json in_progress = { + {"event", "response.in_progress"}, + {"data", json { + {"type", "response.in_progress"}, + {"response", json { + {"object", "response"}, + {"status", "in_progress"} + }} + }} + }; + const json output_item_added = { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"type", "message"}, + {"status", "in_progress"}, + {"content", json::array()}, + {"role", "assistant"} + }} + }} + }; + const json content_part_added = { + {"event", "response.content_part.added"}, + {"data", json { + {"type", "response.content_part.added"}, + {"part", json { + {"type", "output_text"}, + {"text", ""} + }} + }} + }; + + const json initial_events = json::array({ + created, + in_progress, + output_item_added, + content_part_added + }); + + res->data = format_oai_resp_sse(initial_events) + format_oai_resp_sse(first_result_json); } else { res->data = format_oai_sse(first_result_json); } @@ -3015,13 +3067,16 @@ std::unique_ptr server_routes::handle_completions_impl( // check if there is more data if (!rd.has_next()) { - if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { - // Anthropic doesn't send [DONE], message_stop was already sent - output = ""; - } else if (res_type != TASK_RESPONSE_TYPE_NONE) { - output = "data: [DONE]\n\n"; - } else { - output = ""; + switch (res_type) { + case TASK_RESPONSE_TYPE_NONE: + case TASK_RESPONSE_TYPE_OAI_RESP: + case TASK_RESPONSE_TYPE_ANTHROPIC: + output = ""; + break; + + default: + output = "data: [DONE]\n\n"; + break; } SRV_DBG("%s", "all results received, terminating stream\n"); return false; // no more data, terminate @@ -3049,6 +3104,8 @@ std::unique_ptr server_routes::handle_completions_impl( json res_json = result->to_json(); if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) { output = format_anthropic_sse(res_json); + } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) { + output = format_oai_resp_sse(res_json); } else { output = format_oai_sse(res_json); } @@ -3479,6 +3536,22 @@ void server_routes::init_routes() { TASK_RESPONSE_TYPE_OAI_CHAT); }; + this->post_responses_oai = [this](const server_http_req & req) { + auto res = create_response(); + std::vector files; + json body = convert_responses_to_chatcmpl(json::parse(req.body)); + json body_parsed = oaicompat_chat_params_parse( + body, + ctx_server.oai_parser_opt, + files); + return handle_completions_impl( + req, + SERVER_TASK_TYPE_COMPLETION, + body_parsed, + files, + TASK_RESPONSE_TYPE_OAI_RESP); + }; + this->post_anthropic_messages = [this](const server_http_req & req) { auto res = create_response(); std::vector files; diff --git a/tools/server/server-context.h b/tools/server/server-context.h index 09bec15ae1..3bf81b447e 100644 --- a/tools/server/server-context.h +++ b/tools/server/server-context.h @@ -95,6 +95,7 @@ struct server_routes { server_http_context::handler_t post_completions; server_http_context::handler_t post_completions_oai; server_http_context::handler_t post_chat_completions; + server_http_context::handler_t post_responses_oai; server_http_context::handler_t post_anthropic_messages; server_http_context::handler_t post_anthropic_count_tokens; server_http_context::handler_t post_apply_template; diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 22f5b2059c..f8093677d5 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -578,6 +578,8 @@ json server_task_result_cmpl_final::to_json() { return to_json_oaicompat(); case TASK_RESPONSE_TYPE_OAI_CHAT: return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat(); + case TASK_RESPONSE_TYPE_OAI_RESP: + return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp(); case TASK_RESPONSE_TYPE_ANTHROPIC: return stream ? to_json_anthropic_stream() : to_json_anthropic(); default: @@ -795,6 +797,122 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { return deltas; } +json server_task_result_cmpl_final::to_json_oaicompat_resp() { + common_chat_msg msg; + if (!oaicompat_msg.empty()) { + msg = oaicompat_msg; + } else { + msg.role = "assistant"; + msg.content = content; + } + + const json reasoning = { + {"type", "reasoning"}, + {"summary", json::array({json { + {"type", "summary_text"}, + {"text", msg.reasoning_content} + }})} + }; + const json message = { + {"type", "message"}, + {"status", "completed"}, + {"content", json::array({json { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", msg.content} + }})}, + {"role", msg.role} + }; + + std::time_t t = std::time(0); + json res = { + {"object", "response"}, + {"created_at", t}, + {"status", "completed"}, + {"model", oaicompat_model}, + {"output", json::array({reasoning, message})}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens} + }}, + }; + + if (verbose) { + res["__verbose"] = to_json_non_oaicompat(); + } + if (timings.prompt_n >= 0) { + res.push_back({"timings", timings.to_json()}); + } + + return res; +} + +json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { + json server_sent_events = json::array(); + + server_sent_events.push_back(json { + {"event", "response.output_text.done"}, + {"data", json { + {"type", "response.output_text.done"}, + {"text", oaicompat_msg.content} + }} + }); + + const json part = { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", oaicompat_msg.content} + }; + + server_sent_events.push_back(json { + {"event", "response.content_part.done"}, + {"data", json { + {"type", "response.content_part.done"}, + {"part", part} + }} + }); + + const json item = { + {"type", "message"}, + {"status", "completed"}, + {"content", json::array({part})}, + {"role", "assistant"} + }; + + server_sent_events.push_back(json { + {"event", "response.output_item.done"}, + {"data", json { + {"type", "response.output_item.done"}, + {"item", item} + }} + }); + + std::time_t t = std::time(0); + server_sent_events.push_back(json { + {"event", "response.completed"}, + {"data", json { + {"type", "response.completed"}, + {"response", json { + {"object", "response"}, + {"created_at", t}, + {"status", "completed"}, + {"model", oaicompat_model}, + {"output", json::array({item})}, + {"usage", json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens} + }} + }}, + }} + }); + + return server_sent_events; +} + json server_task_result_cmpl_final::to_json_anthropic() { std::string stop_reason = "max_tokens"; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { @@ -989,6 +1107,8 @@ json server_task_result_cmpl_partial::to_json() { return to_json_oaicompat(); case TASK_RESPONSE_TYPE_OAI_CHAT: return to_json_oaicompat_chat(); + case TASK_RESPONSE_TYPE_OAI_RESP: + return to_json_oaicompat_resp(); case TASK_RESPONSE_TYPE_ANTHROPIC: return to_json_anthropic(); default: @@ -1058,6 +1178,33 @@ json server_task_result_cmpl_partial::to_json_oaicompat() { return res; } +json server_task_result_cmpl_partial::to_json_oaicompat_resp() { + std::vector deltas; + + for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { + if (!diff.reasoning_content_delta.empty()) { + deltas.push_back(json { + {"event", "response.reasoning_text.delta"}, + {"data", json { + {"type", "response.reasoning_text.delta"}, + {"delta", diff.reasoning_content_delta} + }} + }); + } + if (!diff.content_delta.empty()) { + deltas.push_back(json { + {"event", "response.output_text.delta"}, + {"data", json { + {"type", "response.output_text.delta"}, + {"delta", diff.content_delta} + }} + }); + } + } + + return deltas; +} + json server_task_result_cmpl_partial::to_json_oaicompat_chat() { bool first = n_decoded == 1; std::time_t t = std::time(0); diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 687770de5e..3c411910d1 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -33,6 +33,7 @@ enum task_response_type { TASK_RESPONSE_TYPE_NONE, // llama.cpp native format TASK_RESPONSE_TYPE_OAI_CHAT, TASK_RESPONSE_TYPE_OAI_CMPL, + TASK_RESPONSE_TYPE_OAI_RESP, TASK_RESPONSE_TYPE_OAI_EMBD, TASK_RESPONSE_TYPE_ANTHROPIC, }; @@ -311,6 +312,10 @@ struct server_task_result_cmpl_final : server_task_result { json to_json_oaicompat_chat_stream(); + json to_json_oaicompat_resp(); + + json to_json_oaicompat_resp_stream(); + json to_json_anthropic(); json to_json_anthropic_stream(); @@ -354,6 +359,8 @@ struct server_task_result_cmpl_partial : server_task_result { json to_json_oaicompat_chat(); + json to_json_oaicompat_resp(); + json to_json_anthropic(); }; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 0fbc7b6d35..f73eb12763 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -140,6 +140,7 @@ int main(int argc, char ** argv, char ** envp) { routes.post_completions = models_routes->proxy_post; routes.post_completions_oai = models_routes->proxy_post; routes.post_chat_completions = models_routes->proxy_post; + routes.post_responses_oai = models_routes->proxy_post; routes.post_anthropic_messages = models_routes->proxy_post; routes.post_anthropic_count_tokens = models_routes->proxy_post; routes.post_infill = models_routes->proxy_post; @@ -176,6 +177,7 @@ int main(int argc, char ** argv, char ** envp) { ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions)); ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions)); ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint + ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai)); ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting ctx_http.post("/infill", ex_wrapper(routes.post_infill)); diff --git a/tools/server/tests/requirements.txt b/tools/server/tests/requirements.txt index 4ea7f19f77..ca79d025ed 100644 --- a/tools/server/tests/requirements.txt +++ b/tools/server/tests/requirements.txt @@ -2,7 +2,7 @@ aiohttp~=3.9.3 pytest~=8.3.3 huggingface_hub>=0.34.0,<1.0 numpy~=1.26.4 -openai~=1.55.3 +openai~=2.14.0 prometheus-client~=0.20.0 requests~=2.32.3 wget~=3.2 diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py new file mode 100644 index 0000000000..e168f4562d --- /dev/null +++ b/tools/server/tests/unit/test_compat_oai_responses.py @@ -0,0 +1,48 @@ +import pytest +from openai import OpenAI +from utils import * + +server: ServerProcess + +@pytest.fixture(autouse=True) +def create_server(): + global server + server = ServerPreset.tinyllama2() + +def test_responses_with_openai_library(): + global server + server.start() + client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1") + res = client.responses.create( + model="gpt-4.1", + input=[ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + max_output_tokens=8, + temperature=0.8, + ) + assert match_regex("(Suddenly)+", res.output_text) + +def test_responses_stream_with_openai_library(): + global server + server.start() + client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1") + stream = client.responses.create( + model="gpt-4.1", + input=[ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + ], + max_output_tokens=8, + temperature=0.8, + stream=True, + ) + + gathered_text = '' + for r in stream: + if r.type == "response.output_text.delta": + gathered_text += r.delta + if r.type == "response.completed": + assert gathered_text == r.response.output_text + assert match_regex("(Suddenly)+", r.response.output_text) From e4571a79a6f5a42a837c46bdc426b01e16a57b9c Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 30 Dec 2025 05:44:41 +0000 Subject: [PATCH 2/5] Make instruction(system) as first message --- tools/server/server-common.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 96ec86edbc..0605c4ecae 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1081,18 +1081,7 @@ json convert_responses_to_chatcmpl(const json & body) { const json input_value = body.at("input"); json chatcmpl_messages = json::array(); - if (input_value.is_array()) { - chatcmpl_messages = input_value; - } else if (input_value.is_string()) { - chatcmpl_messages.push_back({ - {"role", "user"}, - {"content", input_value}, - }); - } else { - std::invalid_argument("'input' must be a string or array of objects"); - } - - const std::string instructions = json_value(body, "instructions", std::string{}); + const std::string instructions = json_value(body, "instructions", std::string()); if (instructions != "") { chatcmpl_messages.push_back({ {"role", "system"}, @@ -1100,6 +1089,17 @@ json convert_responses_to_chatcmpl(const json & body) { }); } + if (input_value.is_string()) { + chatcmpl_messages.push_back({ + {"role", "user"}, + {"content", input_value}, + }); + } else if (input_value.is_array()) { + chatcmpl_messages = input_value; + } else { + throw std::invalid_argument("'input' must be a string or array of objects"); + } + json chatcmpl_body = body; chatcmpl_body.erase("input"); chatcmpl_body["messages"] = chatcmpl_messages; From d48e3418fdebda7ae5f41d548600f59462a55a28 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 30 Dec 2025 05:45:32 +0000 Subject: [PATCH 3/5] Convert [input_message] (text/image/file) --- tools/server/server-common.cpp | 70 +++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 0605c4ecae..32b4e67dcf 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1095,7 +1095,75 @@ json convert_responses_to_chatcmpl(const json & body) { {"content", input_value}, }); } else if (input_value.is_array()) { - chatcmpl_messages = input_value; + for (const auto & input_message : input_value) { + if (!input_message.contains("content")) { + throw std::invalid_argument("'content' is required"); + } + const json content = input_message.at("content"); + + if (content.is_string()) { + chatcmpl_messages.push_back(input_message); + } else if (content.is_array()) { + json new_content = json::array(); + + for (const auto & input_item : content) { + const std::string type = json_value(input_item, "type", std::string()); + + if (type == "input_text") { + if (!input_item.contains("text")) { + throw std::invalid_argument("'Input text' requires 'text'"); + } + new_content.push_back({ + {"text", input_item.at("text")}, + {"type", "text"} + }); + } else if (type == "input_image") { + // While `detail` is marked as required, + // it has default value("auto") and can be omitted. + + if (!input_item.contains("image_url")) { + throw std::invalid_argument("'image_url' is required"); + } + new_content.push_back({ + {"image_url", json {{"url", input_item.at("image_url")}}}, + {"type", "image_url"} + }); + } else if (type == "input_file") { + if (input_item.contains("file_url")) { + // chat completion API does not support file_url + throw std::invalid_argument("'file_url' is not supported"); + } + if (!input_item.contains("file_data") || !input_item.contains("filename")) { + throw std::invalid_argument("Both 'file_data' and 'filename' are required"); + } + new_content.push_back({ + {"file", json { + {"file_data", input_item.at("file_data")}, + {"filename", input_item.at("filename")}}}, + {"type", "file"} + }); + } else { + throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'"); + } + } + + json new_input_message = input_message; + new_input_message["content"] = new_content; + + chatcmpl_messages.push_back(new_input_message); + } else { + throw std::invalid_argument("'content' must be a string or array of objects"); + } + + const std::string role = json_value(input_message, "role", std::string()); + if (role != "user" && role != "assistant" && role != "system" && role != "developer") { + throw std::invalid_argument("'role' must be one of user, assistant, system, or developer"); + } + + if (input_message.contains("type") && input_message.at("type") != "message") { + throw std::invalid_argument("If 'type' is defined, it should be 'message'"); + } + } } else { throw std::invalid_argument("'input' must be a string or array of objects"); } From a2c35998ce3a073bb9505e9e1d54299164cafbe9 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Tue, 30 Dec 2025 06:29:05 +0000 Subject: [PATCH 4/5] Rename convert_responses_to_chatcmpl(body) -> response_body --- tools/server/server-common.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 32b4e67dcf..fcf50c8eae 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1070,18 +1070,18 @@ json oaicompat_chat_params_parse( return llama_params; } -json convert_responses_to_chatcmpl(const json & body) { - if (!body.contains("input")) { +json convert_responses_to_chatcmpl(const json & response_body) { + if (!response_body.contains("input")) { throw std::invalid_argument("'input' is required"); } - if (!json_value(body, "previous_response_id", std::string{}).empty()) { + if (!json_value(response_body, "previous_response_id", std::string{}).empty()) { throw std::invalid_argument("llama.cpp does not support 'previous_response_id'."); } - const json input_value = body.at("input"); + const json input_value = response_body.at("input"); json chatcmpl_messages = json::array(); - const std::string instructions = json_value(body, "instructions", std::string()); + const std::string instructions = json_value(response_body, "instructions", std::string()); if (instructions != "") { chatcmpl_messages.push_back({ {"role", "system"}, @@ -1168,13 +1168,13 @@ json convert_responses_to_chatcmpl(const json & body) { throw std::invalid_argument("'input' must be a string or array of objects"); } - json chatcmpl_body = body; + json chatcmpl_body = response_body; chatcmpl_body.erase("input"); chatcmpl_body["messages"] = chatcmpl_messages; - if (body.contains("max_output_tokens")) { + if (response_body.contains("max_output_tokens")) { chatcmpl_body.erase("max_output_tokens"); - chatcmpl_body["max_tokens"] = body["max_output_tokens"]; + chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; } return chatcmpl_body; From 9f09745e05e427a85057302ed0458627b4f2b512 Mon Sep 17 00:00:00 2001 From: openingnow <> Date: Thu, 1 Jan 2026 00:26:37 +0000 Subject: [PATCH 5/5] Initial tool call support --- tools/server/server-common.cpp | 162 ++++++++++++++++++++++++++++----- tools/server/server-task.cpp | 116 ++++++++++++++++------- 2 files changed, 219 insertions(+), 59 deletions(-) diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index fcf50c8eae..3772a540bd 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1090,30 +1090,52 @@ json convert_responses_to_chatcmpl(const json & response_body) { } if (input_value.is_string()) { + // #responses_create-input-text_input chatcmpl_messages.push_back({ {"role", "user"}, {"content", input_value}, }); } else if (input_value.is_array()) { - for (const auto & input_message : input_value) { - if (!input_message.contains("content")) { - throw std::invalid_argument("'content' is required"); + // #responses_create-input-input_item_list + + const auto exists_and_is_array = [](const json & j, const char * key) -> bool { + return j.contains(key) && j.at(key).is_array(); + }; + const auto exists_and_is_string = [](const json & j, const char * key) -> bool { + return j.contains(key) && j.at(key).is_string(); + }; + + for (json item : input_value) { + if (exists_and_is_string(item, "content")) { + // #responses_create-input-input_item_list-input_message-content-text_input + // Only "Input message" contains item["content"]::string + // After converting item["content"]::string to item["content"]::array, + // we can treat "Input message" as sum of "Item-Input message" and "Item-Output message" + item["content"] = json::array({ + json { + {"text", item.at("content")}, + {"type", "input_text"} + } + }); } - const json content = input_message.at("content"); - if (content.is_string()) { - chatcmpl_messages.push_back(input_message); - } else if (content.is_array()) { - json new_content = json::array(); + if (exists_and_is_array(item, "content") && + exists_and_is_string(item, "role") && + (item.at("role") == "user" || + item.at("role") == "system" || + item.at("role") == "developer") + ) { + // #responses_create-input-input_item_list-item-input_message + json chatcmpl_content = json::array(); - for (const auto & input_item : content) { + for (const json & input_item : item.at("content")) { const std::string type = json_value(input_item, "type", std::string()); if (type == "input_text") { if (!input_item.contains("text")) { throw std::invalid_argument("'Input text' requires 'text'"); } - new_content.push_back({ + chatcmpl_content.push_back({ {"text", input_item.at("text")}, {"type", "text"} }); @@ -1124,7 +1146,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { if (!input_item.contains("image_url")) { throw std::invalid_argument("'image_url' is required"); } - new_content.push_back({ + chatcmpl_content.push_back({ {"image_url", json {{"url", input_item.at("image_url")}}}, {"type", "image_url"} }); @@ -1136,7 +1158,7 @@ json convert_responses_to_chatcmpl(const json & response_body) { if (!input_item.contains("file_data") || !input_item.contains("filename")) { throw std::invalid_argument("Both 'file_data' and 'filename' are required"); } - new_content.push_back({ + chatcmpl_content.push_back({ {"file", json { {"file_data", input_item.at("file_data")}, {"filename", input_item.at("filename")}}}, @@ -1147,21 +1169,87 @@ json convert_responses_to_chatcmpl(const json & response_body) { } } - json new_input_message = input_message; - new_input_message["content"] = new_content; + item["content"] = chatcmpl_content; - chatcmpl_messages.push_back(new_input_message); + chatcmpl_messages.push_back(item); + } else if (exists_and_is_array(item, "content") && + exists_and_is_string(item, "role") && + item.at("role") == "assistant" && + exists_and_is_string(item, "status") && + (item.at("status") == "in_progress" || + item.at("status") == "completed" || + item.at("status") == "incomplete") && + exists_and_is_string(item, "type") && + item.at("type") == "message" + ) { + // #responses_create-input-input_item_list-item-output_message + json chatcmpl_content = json::array(); + + for (const auto & output_text : item.at("content")) { + const std::string type = json_value(output_text, "type", std::string()); + if (type != "output_text") { + throw std::invalid_argument("'type' must be 'output_text'"); + } + if (!exists_and_is_string(output_text, "text")) { + throw std::invalid_argument("'Output text' requires 'text'"); + } + // Ignore annotations and logprobs for now + chatcmpl_content.push_back({ + {"text", output_text.at("text")}, + {"type", "text"} + }); + } + + item.erase("status"); + item.erase("type"); + item["content"] = chatcmpl_content; + chatcmpl_messages.push_back(item); + } else if (exists_and_is_string(item, "arguments") && + exists_and_is_string(item, "call_id") && + exists_and_is_string(item, "name") && + exists_and_is_string(item, "type") && + item.at("type") == "function_call" + ) { + // #responses_create-input-input_item_list-item-function_tool_call + chatcmpl_messages.push_back(json { + {"role", "assistant"}, + {"tool_calls", json::array({ json { + {"function", json { + {"arguments", item.at("arguments")}, + {"name", item.at("name")} + }}, + {"id", item.at("call_id")}, + {"type", "function"} + }})}, + }); + } else if (exists_and_is_string(item, "call_id") && + (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) && + exists_and_is_string(item, "type") && + item.at("type") == "function_call_output" + ) { + // #responses_create-input-input_item_list-item-function_tool_call_output + if (item.at("output").is_string()) { + chatcmpl_messages.push_back(json { + {"content", item.at("output")}, + {"role", "tool"}, + {"tool_call_id", item.at("call_id")} + }); + } else { + json chatcmpl_outputs = item.at("output"); + for (json & chatcmpl_output : chatcmpl_outputs) { + if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") { + throw std::invalid_argument("Output of tool call should be 'Input text'"); + } + chatcmpl_output["type"] = "text"; + } + chatcmpl_messages.push_back(json { + {"content", chatcmpl_outputs}, + {"role", "tool"}, + {"tool_call_id", item.at("call_id")} + }); + } } else { - throw std::invalid_argument("'content' must be a string or array of objects"); - } - - const std::string role = json_value(input_message, "role", std::string()); - if (role != "user" && role != "assistant" && role != "system" && role != "developer") { - throw std::invalid_argument("'role' must be one of user, assistant, system, or developer"); - } - - if (input_message.contains("type") && input_message.at("type") != "message") { - throw std::invalid_argument("If 'type' is defined, it should be 'message'"); + throw std::invalid_argument("Cannot determine type of 'item'"); } } } else { @@ -1172,6 +1260,30 @@ json convert_responses_to_chatcmpl(const json & response_body) { chatcmpl_body.erase("input"); chatcmpl_body["messages"] = chatcmpl_messages; + if (response_body.contains("tools")) { + if (!response_body.at("tools").is_array()) { + throw std::invalid_argument("'tools' must be an array of objects"); + } + json chatcmpl_tools = json::array(); + for (json resp_tool : response_body.at("tools")) { + json chatcmpl_tool; + + if (json_value(resp_tool, "type", std::string()) != "function") { + throw std::invalid_argument("'type' of tool must be 'function'"); + } + resp_tool.erase("type"); + chatcmpl_tool["type"] = "function"; + + if (!resp_tool.contains("strict")) { + resp_tool["strict"] = true; + } + chatcmpl_tool["function"] = resp_tool; + chatcmpl_tools.push_back(chatcmpl_tool); + } + chatcmpl_body.erase("tools"); + chatcmpl_body["tools"] = chatcmpl_tools; + } + if (response_body.contains("max_output_tokens")) { chatcmpl_body.erase("max_output_tokens"); chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index f8093677d5..f174531ee4 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -851,44 +851,69 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() { json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { json server_sent_events = json::array(); + json output = json::array(); - server_sent_events.push_back(json { - {"event", "response.output_text.done"}, - {"data", json { - {"type", "response.output_text.done"}, - {"text", oaicompat_msg.content} - }} - }); + for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) { + server_sent_events.push_back(json { + {"event", "response.output_item.done"}, + {"data", json { + {"type", "response.output_item.done"}, + {"item", json { + {"type", "function_call"}, + {"status", "completed"}, + {"arguments", tool_call.arguments}, + {"call_id", "call_dummy_id"}, + {"name", tool_call.name} + }} + }} + }); + output.push_back({ + {"type", "function_call"}, + {"status", "completed"}, + {"arguments", tool_call.arguments}, + {"name", tool_call.name} + }); + } - const json part = { - {"type", "output_text"}, - {"annotations", json::array()}, - {"logprobs", json::array()}, - {"text", oaicompat_msg.content} - }; + if (oaicompat_msg.content != "") { + server_sent_events.push_back(json { + {"event", "response.output_text.done"}, + {"data", json { + {"type", "response.output_text.done"}, + {"text", oaicompat_msg.content} + }} + }); - server_sent_events.push_back(json { - {"event", "response.content_part.done"}, - {"data", json { - {"type", "response.content_part.done"}, - {"part", part} - }} - }); + const json part = { + {"type", "output_text"}, + {"annotations", json::array()}, + {"logprobs", json::array()}, + {"text", oaicompat_msg.content} + }; - const json item = { - {"type", "message"}, - {"status", "completed"}, - {"content", json::array({part})}, - {"role", "assistant"} - }; + server_sent_events.push_back(json { + {"event", "response.content_part.done"}, + {"data", json { + {"type", "response.content_part.done"}, + {"part", part} + }} + }); + const json item = { + {"type", "message"}, + {"status", "completed"}, + {"content", json::array({part})}, + {"role", "assistant"} + }; - server_sent_events.push_back(json { - {"event", "response.output_item.done"}, - {"data", json { - {"type", "response.output_item.done"}, - {"item", item} - }} - }); + server_sent_events.push_back(json { + {"event", "response.output_item.done"}, + {"data", json { + {"type", "response.output_item.done"}, + {"item", item} + }} + }); + output.push_back(item); + } std::time_t t = std::time(0); server_sent_events.push_back(json { @@ -896,11 +921,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"data", json { {"type", "response.completed"}, {"response", json { + {"id", "resp_dummy_id"}, {"object", "response"}, {"created_at", t}, {"status", "completed"}, {"model", oaicompat_model}, - {"output", json::array({item})}, + {"output", output}, {"usage", json { {"input_tokens", n_prompt_tokens}, {"output_tokens", n_decoded}, @@ -1191,6 +1217,28 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() { }} }); } + if (!diff.tool_call_delta.name.empty()) { + deltas.push_back(json { + {"event", "response.output_item.added"}, + {"data", json { + {"type", "response.output_item.added"}, + {"item", json { + {"type", "function_call"}, + {"status", "in_progress"}, + {"name", diff.tool_call_delta.name} + }} + }} + }); + } + if (!diff.tool_call_delta.arguments.empty()) { + deltas.push_back(json { + {"event", "response.function_call_arguments.delta"}, + {"data", json { + {"type", "response.function_call_arguments.delta"}, + {"delta", diff.tool_call_delta.arguments} + }} + }); + } if (!diff.content_delta.empty()) { deltas.push_back(json { {"event", "response.output_text.delta"},