From 1df28c40539df2239db91d62d39c54a49bbbcffa Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 30 Dec 2025 05:44:08 +0000
Subject: [PATCH 1/5] from previous PR

---
 requirements/requirements-tool_bench.txt      |   2 +-
 tools/server/server-common.cpp                |  60 +++++++
 tools/server/server-common.h                  |   5 +
 tools/server/server-context.cpp               |  87 ++++++++++-
 tools/server/server-context.h                 |   1 +
 tools/server/server-task.cpp                  | 147 ++++++++++++++++++
 tools/server/server-task.h                    |   7 +
 tools/server/server.cpp                       |   2 +
 tools/server/tests/requirements.txt           |   2 +-
 .../tests/unit/test_compat_oai_responses.py   |  48 ++++++
 10 files changed, 352 insertions(+), 9 deletions(-)
 create mode 100644 tools/server/tests/unit/test_compat_oai_responses.py

diff --git a/requirements/requirements-tool_bench.txt b/requirements/requirements-tool_bench.txt
index f7912aff72..3bb74fb9d0 100644
--- a/requirements/requirements-tool_bench.txt
+++ b/requirements/requirements-tool_bench.txt
@@ -3,7 +3,7 @@ pytest~=8.3.3
 huggingface_hub>=0.34.0,<1.0
 matplotlib~=3.10.0
 numpy~=1.26.4
-openai~=1.55.3
+openai~=2.14.0
 pandas~=2.2.3
 prometheus-client~=0.20.0
 requests~=2.32.3
diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index b02afaefda..96ec86edbc 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1070,6 +1070,48 @@ json oaicompat_chat_params_parse(
     return llama_params;
 }
 
+json convert_responses_to_chatcmpl(const json & body) {
+    if (!body.contains("input")) {
+        throw std::invalid_argument("'input' is required");
+    }
+    if (!json_value(body, "previous_response_id", std::string{}).empty()) {
+        throw std::invalid_argument("llama.cpp does not support 'previous_response_id'.");
+    }
+
+    const json input_value = body.at("input");
+    json chatcmpl_messages = json::array();
+
+    if (input_value.is_array()) {
+        chatcmpl_messages = input_value;
+    } else if (input_value.is_string()) {
+        chatcmpl_messages.push_back({
+            {"role",    "user"},
+            {"content", input_value},
+        });
+    } else {
+        std::invalid_argument("'input' must be a string or array of objects");
+    }
+
+    const std::string instructions = json_value(body, "instructions", std::string{});
+    if (instructions != "") {
+        chatcmpl_messages.push_back({
+            {"role",    "system"},
+            {"content", instructions},
+        });
+    }
+
+    json chatcmpl_body = body;
+    chatcmpl_body.erase("input");
+    chatcmpl_body["messages"] = chatcmpl_messages;
+
+    if (body.contains("max_output_tokens")) {
+        chatcmpl_body.erase("max_output_tokens");
+        chatcmpl_body["max_tokens"] = body["max_output_tokens"];
+    }
+
+    return chatcmpl_body;
+}
+
 json convert_anthropic_to_oai(const json & body) {
     json oai_body;
 
@@ -1478,6 +1520,24 @@ std::string format_oai_sse(const json & data) {
     return ss.str();
 }
 
+std::string format_oai_resp_sse(const json & data) {
+    std::ostringstream ss;
+    auto send_single = [&ss](const json & event_obj) {
+        ss << "event: " << event_obj.at("event").get<std::string>() << "\n";
+        ss << "data: " << safe_json_to_str(event_obj.at("data")) << "\n\n";
+    };
+
+    if (data.is_array()) {
+        for (const auto & item : data) {
+            send_single(item);
+        }
+    } else {
+        send_single(data);
+    }
+
+    return ss.str();
+}
+
 std::string format_anthropic_sse(const json & data) {
     std::ostringstream ss;
 
diff --git a/tools/server/server-common.h b/tools/server/server-common.h
index 152a2a3c46..5827a6fc4d 100644
--- a/tools/server/server-common.h
+++ b/tools/server/server-common.h
@@ -295,6 +295,9 @@ json oaicompat_chat_params_parse(
     const oaicompat_parser_options & opt,
     std::vector<raw_buffer> & out_files);
 
+// convert OpenAI Responses API format to OpenAI Chat Completions API format
+json convert_responses_to_chatcmpl(const json & body);
+
 // convert Anthropic Messages API format to OpenAI Chat Completions API format
 json convert_anthropic_to_oai(const json & body);
 
@@ -332,6 +335,8 @@ std::string tokens_to_output_formatted_string(const llama_context * ctx, const l
 // note: if data is a json array, it will be sent as multiple events, one per item
 std::string format_oai_sse(const json & data);
 
+std::string format_oai_resp_sse(const json & data);
+
 // format Anthropic-style SSE with event types
 std::string format_anthropic_sse(const json & data);
 
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 9726e02522..cc2016dff2 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -2981,6 +2981,58 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
         json first_result_json = first_result->to_json();
         if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
             res->data = format_anthropic_sse(first_result_json);
+        } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
+            const json created = {
+                {"event", "response.created"},
+                {"data", json {
+                    {"type", "response.created"},
+                    {"response", json {
+                        {"object", "response"},
+                        {"status", "in_progress"}
+                    }}
+                }}
+            };
+            const json in_progress = {
+                {"event", "response.in_progress"},
+                {"data", json {
+                    {"type", "response.in_progress"},
+                    {"response", json {
+                        {"object", "response"},
+                        {"status", "in_progress"}
+                    }}
+                }}
+            };
+            const json output_item_added = {
+                {"event", "response.output_item.added"},
+                {"data", json {
+                    {"type", "response.output_item.added"},
+                    {"item", json {
+                        {"type", "message"},
+                        {"status", "in_progress"},
+                        {"content", json::array()},
+                        {"role", "assistant"}
+                    }}
+                }}
+            };
+            const json content_part_added = {
+                {"event", "response.content_part.added"},
+                {"data", json {
+                    {"type", "response.content_part.added"},
+                    {"part", json {
+                        {"type", "output_text"},
+                        {"text", ""}
+                    }}
+                }}
+            };
+
+            const json initial_events = json::array({
+                created,
+                in_progress,
+                output_item_added,
+                content_part_added
+            });
+
+            res->data = format_oai_resp_sse(initial_events) + format_oai_resp_sse(first_result_json);
         } else {
             res->data = format_oai_sse(first_result_json);
         }
@@ -3015,13 +3067,16 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
 
                 // check if there is more data
                 if (!rd.has_next()) {
-                    if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
-                        // Anthropic doesn't send [DONE], message_stop was already sent
-                        output = "";
-                    } else if (res_type != TASK_RESPONSE_TYPE_NONE) {
-                        output = "data: [DONE]\n\n";
-                    } else {
-                        output = "";
+                    switch (res_type) {
+                        case TASK_RESPONSE_TYPE_NONE:
+                        case TASK_RESPONSE_TYPE_OAI_RESP:
+                        case TASK_RESPONSE_TYPE_ANTHROPIC:
+                            output = "";
+                            break;
+
+                        default:
+                            output = "data: [DONE]\n\n";
+                            break;
                     }
                     SRV_DBG("%s", "all results received, terminating stream\n");
                     return false; // no more data, terminate
@@ -3049,6 +3104,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
                     json res_json = result->to_json();
                     if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
                         output = format_anthropic_sse(res_json);
+                    } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
+                        output = format_oai_resp_sse(res_json);
                     } else {
                         output = format_oai_sse(res_json);
                     }
@@ -3479,6 +3536,22 @@ void server_routes::init_routes() {
             TASK_RESPONSE_TYPE_OAI_CHAT);
     };
 
+    this->post_responses_oai = [this](const server_http_req & req) {
+        auto res = create_response();
+        std::vector<raw_buffer> files;
+        json body = convert_responses_to_chatcmpl(json::parse(req.body));
+        json body_parsed = oaicompat_chat_params_parse(
+            body,
+            ctx_server.oai_parser_opt,
+            files);
+        return handle_completions_impl(
+            req,
+            SERVER_TASK_TYPE_COMPLETION,
+            body_parsed,
+            files,
+            TASK_RESPONSE_TYPE_OAI_RESP);
+    };
+
     this->post_anthropic_messages = [this](const server_http_req & req) {
         auto res = create_response();
         std::vector<raw_buffer> files;
diff --git a/tools/server/server-context.h b/tools/server/server-context.h
index 09bec15ae1..3bf81b447e 100644
--- a/tools/server/server-context.h
+++ b/tools/server/server-context.h
@@ -95,6 +95,7 @@ struct server_routes {
     server_http_context::handler_t post_completions;
     server_http_context::handler_t post_completions_oai;
     server_http_context::handler_t post_chat_completions;
+    server_http_context::handler_t post_responses_oai;
     server_http_context::handler_t post_anthropic_messages;
     server_http_context::handler_t post_anthropic_count_tokens;
     server_http_context::handler_t post_apply_template;
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 22f5b2059c..f8093677d5 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -578,6 +578,8 @@ json server_task_result_cmpl_final::to_json() {
             return to_json_oaicompat();
         case TASK_RESPONSE_TYPE_OAI_CHAT:
             return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat();
+        case TASK_RESPONSE_TYPE_OAI_RESP:
+            return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp();
         case TASK_RESPONSE_TYPE_ANTHROPIC:
             return stream ? to_json_anthropic_stream() : to_json_anthropic();
         default:
@@ -795,6 +797,122 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
     return deltas;
 }
 
+json server_task_result_cmpl_final::to_json_oaicompat_resp() {
+    common_chat_msg msg;
+    if (!oaicompat_msg.empty()) {
+        msg = oaicompat_msg;
+    } else {
+        msg.role = "assistant";
+        msg.content = content;
+    }
+
+    const json reasoning = {
+        {"type",    "reasoning"},
+        {"summary", json::array({json {
+            {"type", "summary_text"},
+            {"text", msg.reasoning_content}
+        }})}
+    };
+    const json message = {
+        {"type",    "message"},
+        {"status",  "completed"},
+        {"content", json::array({json {
+            {"type",        "output_text"},
+            {"annotations", json::array()},
+            {"logprobs",    json::array()},
+            {"text",        msg.content}
+        }})},
+        {"role", msg.role}
+    };
+
+    std::time_t t = std::time(0);
+    json res = {
+        {"object",     "response"},
+        {"created_at", t},
+        {"status",     "completed"},
+        {"model",      oaicompat_model},
+        {"output",     json::array({reasoning, message})},
+        {"usage",      json {
+            {"input_tokens",  n_prompt_tokens},
+            {"output_tokens", n_decoded},
+            {"total_tokens",  n_decoded + n_prompt_tokens}
+        }},
+    };
+
+    if (verbose) {
+        res["__verbose"] = to_json_non_oaicompat();
+    }
+    if (timings.prompt_n >= 0) {
+        res.push_back({"timings", timings.to_json()});
+    }
+
+    return res;
+}
+
+json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
+    json server_sent_events = json::array();
+
+    server_sent_events.push_back(json {
+        {"event", "response.output_text.done"},
+        {"data", json {
+            {"type", "response.output_text.done"},
+            {"text", oaicompat_msg.content}
+        }}
+    });
+
+    const json part = {
+        {"type",        "output_text"},
+        {"annotations", json::array()},
+        {"logprobs",    json::array()},
+        {"text",        oaicompat_msg.content}
+    };
+
+    server_sent_events.push_back(json {
+        {"event", "response.content_part.done"},
+        {"data", json {
+            {"type", "response.content_part.done"},
+            {"part", part}
+        }}
+    });
+
+    const json item = {
+        {"type",    "message"},
+        {"status",  "completed"},
+        {"content", json::array({part})},
+        {"role",    "assistant"}
+    };
+
+    server_sent_events.push_back(json {
+        {"event", "response.output_item.done"},
+        {"data", json {
+            {"type", "response.output_item.done"},
+            {"item", item}
+        }}
+    });
+
+    std::time_t t = std::time(0);
+    server_sent_events.push_back(json {
+        {"event", "response.completed"},
+        {"data", json {
+            {"type", "response.completed"},
+            {"response", json {
+                {"object",     "response"},
+                {"created_at", t},
+                {"status",     "completed"},
+                {"model",      oaicompat_model},
+                {"output",     json::array({item})},
+                {"usage",      json {
+                    {"input_tokens",  n_prompt_tokens},
+                    {"output_tokens", n_decoded},
+                    {"total_tokens",  n_decoded + n_prompt_tokens}
+                }}
+            }},
+        }}
+    });
+
+    return server_sent_events;
+}
+
 json server_task_result_cmpl_final::to_json_anthropic() {
     std::string stop_reason = "max_tokens";
     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
@@ -989,6 +1107,8 @@ json server_task_result_cmpl_partial::to_json() {
             return to_json_oaicompat();
         case TASK_RESPONSE_TYPE_OAI_CHAT:
             return to_json_oaicompat_chat();
+        case TASK_RESPONSE_TYPE_OAI_RESP:
+            return to_json_oaicompat_resp();
         case TASK_RESPONSE_TYPE_ANTHROPIC:
             return to_json_anthropic();
         default:
@@ -1058,6 +1178,33 @@ json server_task_result_cmpl_partial::to_json_oaicompat() {
     return res;
 }
 
+json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
+    std::vector<json> deltas;
+
+    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
+        if (!diff.reasoning_content_delta.empty()) {
+            deltas.push_back(json {
+                {"event", "response.reasoning_text.delta"},
+                {"data", json {
+                    {"type",  "response.reasoning_text.delta"},
+                    {"delta", diff.reasoning_content_delta}
+                }}
+            });
+        }
+        if (!diff.content_delta.empty()) {
+            deltas.push_back(json {
+                {"event", "response.output_text.delta"},
+                {"data", json {
+                    {"type",  "response.output_text.delta"},
+                    {"delta", diff.content_delta}
+                }}
+            });
+        }
+    }
+
+    return deltas;
+}
+
 json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
     bool first = n_decoded == 1;
     std::time_t t = std::time(0);
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index 687770de5e..3c411910d1 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -33,6 +33,7 @@ enum task_response_type {
     TASK_RESPONSE_TYPE_NONE, // llama.cpp native format
     TASK_RESPONSE_TYPE_OAI_CHAT,
     TASK_RESPONSE_TYPE_OAI_CMPL,
+    TASK_RESPONSE_TYPE_OAI_RESP,
     TASK_RESPONSE_TYPE_OAI_EMBD,
     TASK_RESPONSE_TYPE_ANTHROPIC,
 };
@@ -311,6 +312,10 @@ struct server_task_result_cmpl_final : server_task_result {
 
     json to_json_oaicompat_chat_stream();
 
+    json to_json_oaicompat_resp();
+
+    json to_json_oaicompat_resp_stream();
+
     json to_json_anthropic();
 
     json to_json_anthropic_stream();
@@ -354,6 +359,8 @@ struct server_task_result_cmpl_partial : server_task_result {
 
     json to_json_oaicompat_chat();
 
+    json to_json_oaicompat_resp();
+
     json to_json_anthropic();
 };
 
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index 0fbc7b6d35..f73eb12763 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -140,6 +140,7 @@ int main(int argc, char ** argv, char ** envp) {
         routes.post_completions            = models_routes->proxy_post;
         routes.post_completions_oai        = models_routes->proxy_post;
         routes.post_chat_completions       = models_routes->proxy_post;
+        routes.post_responses_oai          = models_routes->proxy_post;
         routes.post_anthropic_messages     = models_routes->proxy_post;
         routes.post_anthropic_count_tokens = models_routes->proxy_post;
         routes.post_infill                 = models_routes->proxy_post;
@@ -176,6 +177,7 @@ int main(int argc, char ** argv, char ** envp) {
     ctx_http.post("/chat/completions",    ex_wrapper(routes.post_chat_completions));
     ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
     ctx_http.post("/api/chat",            ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
+    ctx_http.post("/v1/responses",        ex_wrapper(routes.post_responses_oai));
     ctx_http.post("/v1/messages",         ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
     ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting
     ctx_http.post("/infill",              ex_wrapper(routes.post_infill));
diff --git a/tools/server/tests/requirements.txt b/tools/server/tests/requirements.txt
index 4ea7f19f77..ca79d025ed 100644
--- a/tools/server/tests/requirements.txt
+++ b/tools/server/tests/requirements.txt
@@ -2,7 +2,7 @@ aiohttp~=3.9.3
 pytest~=8.3.3
 huggingface_hub>=0.34.0,<1.0
 numpy~=1.26.4
-openai~=1.55.3
+openai~=2.14.0
 prometheus-client~=0.20.0
 requests~=2.32.3
 wget~=3.2
diff --git a/tools/server/tests/unit/test_compat_oai_responses.py b/tools/server/tests/unit/test_compat_oai_responses.py
new file mode 100644
index 0000000000..e168f4562d
--- /dev/null
+++ b/tools/server/tests/unit/test_compat_oai_responses.py
@@ -0,0 +1,48 @@
+import pytest
+from openai import OpenAI
+from utils import *
+
+server: ServerProcess
+
+@pytest.fixture(autouse=True)
+def create_server():
+    global server
+    server = ServerPreset.tinyllama2()
+
+def test_responses_with_openai_library():
+    global server
+    server.start()
+    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
+    res = client.responses.create(
+        model="gpt-4.1",
+        input=[
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        max_output_tokens=8,
+        temperature=0.8,
+    )
+    assert match_regex("(Suddenly)+", res.output_text)
+
+def test_responses_stream_with_openai_library():
+    global server
+    server.start()
+    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
+    stream = client.responses.create(
+        model="gpt-4.1",
+        input=[
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        max_output_tokens=8,
+        temperature=0.8,
+        stream=True,
+    )
+
+    gathered_text = ''
+    for r in stream:
+        if r.type == "response.output_text.delta":
+            gathered_text += r.delta
+        if r.type == "response.completed":
+            assert gathered_text == r.response.output_text
+            assert match_regex("(Suddenly)+", r.response.output_text)

From e4571a79a6f5a42a837c46bdc426b01e16a57b9c Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 30 Dec 2025 05:44:41 +0000
Subject: [PATCH 2/5] Make instruction(system) as first message

---
 tools/server/server-common.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 96ec86edbc..0605c4ecae 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1081,18 +1081,7 @@ json convert_responses_to_chatcmpl(const json & body) {
     const json input_value = body.at("input");
     json chatcmpl_messages = json::array();
 
-    if (input_value.is_array()) {
-        chatcmpl_messages = input_value;
-    } else if (input_value.is_string()) {
-        chatcmpl_messages.push_back({
-            {"role",    "user"},
-            {"content", input_value},
-        });
-    } else {
-        std::invalid_argument("'input' must be a string or array of objects");
-    }
-
-    const std::string instructions = json_value(body, "instructions", std::string{});
+    const std::string instructions = json_value(body, "instructions", std::string());
     if (instructions != "") {
         chatcmpl_messages.push_back({
             {"role",    "system"},
@@ -1100,6 +1089,17 @@ json convert_responses_to_chatcmpl(const json & body) {
         });
     }
 
+    if (input_value.is_string()) {
+        chatcmpl_messages.push_back({
+            {"role",    "user"},
+            {"content", input_value},
+        });
+    } else if (input_value.is_array()) {
+        chatcmpl_messages = input_value;
+    } else {
+        throw std::invalid_argument("'input' must be a string or array of objects");
+    }
+
     json chatcmpl_body = body;
     chatcmpl_body.erase("input");
     chatcmpl_body["messages"] = chatcmpl_messages;

From d48e3418fdebda7ae5f41d548600f59462a55a28 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 30 Dec 2025 05:45:32 +0000
Subject: [PATCH 3/5] Convert [input_message] (text/image/file)

---
 tools/server/server-common.cpp | 70 +++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 0605c4ecae..32b4e67dcf 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1095,7 +1095,75 @@ json convert_responses_to_chatcmpl(const json & body) {
             {"content", input_value},
         });
     } else if (input_value.is_array()) {
-        chatcmpl_messages = input_value;
+        for (const auto & input_message : input_value) {
+            if (!input_message.contains("content")) {
+                throw std::invalid_argument("'content' is required");
+            }
+            const json content = input_message.at("content");
+
+            if (content.is_string()) {
+                chatcmpl_messages.push_back(input_message);
+            } else if (content.is_array()) {
+                json new_content = json::array();
+
+                for (const auto & input_item : content) {
+                    const std::string type = json_value(input_item, "type", std::string());
+
+                    if (type == "input_text") {
+                        if (!input_item.contains("text")) {
+                            throw std::invalid_argument("'Input text' requires 'text'");
+                        }
+                        new_content.push_back({
+                            {"text", input_item.at("text")},
+                            {"type", "text"}
+                        });
+                    } else if (type == "input_image") {
+                        // While `detail` is marked as required,
+                        // it has default value("auto") and can be omitted.
+
+                        if (!input_item.contains("image_url")) {
+                            throw std::invalid_argument("'image_url' is required");
+                        }
+                        new_content.push_back({
+                            {"image_url", json {{"url", input_item.at("image_url")}}},
+                            {"type", "image_url"}
+                        });
+                    } else if (type == "input_file") {
+                        if (input_item.contains("file_url")) {
+                            // chat completion API does not support file_url
+                            throw std::invalid_argument("'file_url' is not supported");
+                        }
+                        if (!input_item.contains("file_data") || !input_item.contains("filename")) {
+                            throw std::invalid_argument("Both 'file_data' and 'filename' are required");
+                        }
+                        new_content.push_back({
+                            {"file", json {
+                                {"file_data", input_item.at("file_data")},
+                                {"filename",  input_item.at("filename")}}},
+                            {"type", "file"}
+                        });
+                    } else {
+                        throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'");
+                    }
+                }
+
+                json new_input_message = input_message;
+                new_input_message["content"] = new_content;
+
+                chatcmpl_messages.push_back(new_input_message);
+            } else {
+                throw std::invalid_argument("'content' must be a string or array of objects");
+            }
+
+            const std::string role = json_value(input_message, "role", std::string());
+            if (role != "user" && role != "assistant" && role != "system" && role != "developer") {
+                throw std::invalid_argument("'role' must be one of user, assistant, system, or developer");
+            }
+
+            if (input_message.contains("type") && input_message.at("type") != "message") {
+                throw std::invalid_argument("If 'type' is defined, it should be 'message'");
+            }
+        }
     } else {
         throw std::invalid_argument("'input' must be a string or array of objects");
     }

From a2c35998ce3a073bb9505e9e1d54299164cafbe9 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Tue, 30 Dec 2025 06:29:05 +0000
Subject: [PATCH 4/5] Rename convert_responses_to_chatcmpl(body) ->
 response_body

---
 tools/server/server-common.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index 32b4e67dcf..fcf50c8eae 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1070,18 +1070,18 @@ json oaicompat_chat_params_parse(
     return llama_params;
 }
 
-json convert_responses_to_chatcmpl(const json & body) {
-    if (!body.contains("input")) {
+json convert_responses_to_chatcmpl(const json & response_body) {
+    if (!response_body.contains("input")) {
         throw std::invalid_argument("'input' is required");
     }
-    if (!json_value(body, "previous_response_id", std::string{}).empty()) {
+    if (!json_value(response_body, "previous_response_id", std::string{}).empty()) {
         throw std::invalid_argument("llama.cpp does not support 'previous_response_id'.");
     }
 
-    const json input_value = body.at("input");
+    const json input_value = response_body.at("input");
     json chatcmpl_messages = json::array();
 
-    const std::string instructions = json_value(body, "instructions", std::string());
+    const std::string instructions = json_value(response_body, "instructions", std::string());
     if (instructions != "") {
         chatcmpl_messages.push_back({
             {"role",    "system"},
@@ -1168,13 +1168,13 @@ json convert_responses_to_chatcmpl(const json & body) {
         throw std::invalid_argument("'input' must be a string or array of objects");
     }
 
-    json chatcmpl_body = body;
+    json chatcmpl_body = response_body;
     chatcmpl_body.erase("input");
     chatcmpl_body["messages"] = chatcmpl_messages;
 
-    if (body.contains("max_output_tokens")) {
+    if (response_body.contains("max_output_tokens")) {
         chatcmpl_body.erase("max_output_tokens");
-        chatcmpl_body["max_tokens"] = body["max_output_tokens"];
+        chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
     }
 
     return chatcmpl_body;

From 9f09745e05e427a85057302ed0458627b4f2b512 Mon Sep 17 00:00:00 2001
From: openingnow <>
Date: Thu, 1 Jan 2026 00:26:37 +0000
Subject: [PATCH 5/5] Initial tool call support

---
 tools/server/server-common.cpp | 162 ++++++++++++++++++++++++++++-----
 tools/server/server-task.cpp   | 116 ++++++++++++++++-------
 2 files changed, 219 insertions(+), 59 deletions(-)

diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
index fcf50c8eae..3772a540bd 100644
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1090,30 +1090,52 @@ json convert_responses_to_chatcmpl(const json & response_body) {
     }
 
     if (input_value.is_string()) {
+        // #responses_create-input-text_input
         chatcmpl_messages.push_back({
             {"role",    "user"},
             {"content", input_value},
         });
     } else if (input_value.is_array()) {
-        for (const auto & input_message : input_value) {
-            if (!input_message.contains("content")) {
-                throw std::invalid_argument("'content' is required");
+        // #responses_create-input-input_item_list
+
+        const auto exists_and_is_array = [](const json & j, const char * key) -> bool {
+            return j.contains(key) && j.at(key).is_array();
+        };
+        const auto exists_and_is_string = [](const json & j, const char * key) -> bool {
+            return j.contains(key) && j.at(key).is_string();
+        };
+
+        for (json item : input_value) {
+            if (exists_and_is_string(item, "content")) {
+                // #responses_create-input-input_item_list-input_message-content-text_input
+                // Only "Input message" contains item["content"]::string
+                // After converting item["content"]::string to item["content"]::array,
+                // we can treat "Input message" as sum of "Item-Input message" and "Item-Output message"
+                item["content"] = json::array({
+                    json {
+                        {"text", item.at("content")},
+                        {"type", "input_text"}
+                    }
+                });
             }
-            const json content = input_message.at("content");
 
-            if (content.is_string()) {
-                chatcmpl_messages.push_back(input_message);
-            } else if (content.is_array()) {
-                json new_content = json::array();
+            if (exists_and_is_array(item, "content") &&
+                exists_and_is_string(item, "role") &&
+                (item.at("role") == "user" ||
+                    item.at("role") == "system" ||
+                    item.at("role") == "developer")
+            ) {
+                // #responses_create-input-input_item_list-item-input_message
+                json chatcmpl_content = json::array();
 
-                for (const auto & input_item : content) {
+                for (const json & input_item : item.at("content")) {
                     const std::string type = json_value(input_item, "type", std::string());
 
                     if (type == "input_text") {
                         if (!input_item.contains("text")) {
                             throw std::invalid_argument("'Input text' requires 'text'");
                         }
-                        new_content.push_back({
+                        chatcmpl_content.push_back({
                             {"text", input_item.at("text")},
                             {"type", "text"}
                         });
@@ -1124,7 +1146,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                         if (!input_item.contains("image_url")) {
                             throw std::invalid_argument("'image_url' is required");
                         }
-                        new_content.push_back({
+                        chatcmpl_content.push_back({
                             {"image_url", json {{"url", input_item.at("image_url")}}},
                             {"type", "image_url"}
                         });
@@ -1136,7 +1158,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                         if (!input_item.contains("file_data") || !input_item.contains("filename")) {
                             throw std::invalid_argument("Both 'file_data' and 'filename' are required");
                         }
-                        new_content.push_back({
+                        chatcmpl_content.push_back({
                             {"file", json {
                                 {"file_data", input_item.at("file_data")},
                                 {"filename",  input_item.at("filename")}}},
@@ -1147,21 +1169,87 @@ json convert_responses_to_chatcmpl(const json & response_body) {
                     }
                 }
 
-                json new_input_message = input_message;
-                new_input_message["content"] = new_content;
+                item["content"] = chatcmpl_content;
 
-                chatcmpl_messages.push_back(new_input_message);
+                chatcmpl_messages.push_back(item);
+            } else if (exists_and_is_array(item, "content") &&
+                exists_and_is_string(item, "role") &&
+                item.at("role") == "assistant" &&
+                exists_and_is_string(item, "status") &&
+                (item.at("status") == "in_progress" ||
+                    item.at("status") == "completed" ||
+                    item.at("status") == "incomplete") &&
+                exists_and_is_string(item, "type") &&
+                item.at("type") == "message"
+            ) {
+                // #responses_create-input-input_item_list-item-output_message
+                json chatcmpl_content = json::array();
+
+                for (const auto & output_text : item.at("content")) {
+                    const std::string type = json_value(output_text, "type", std::string());
+                    if (type != "output_text") {
+                        throw std::invalid_argument("'type' must be 'output_text'");
+                    }
+                    if (!exists_and_is_string(output_text, "text")) {
+                        throw std::invalid_argument("'Output text' requires 'text'");
+                    }
+                    // Ignore annotations and logprobs for now
+                    chatcmpl_content.push_back({
+                        {"text", output_text.at("text")},
+                        {"type", "text"}
+                    });
+                }
+
+                item.erase("status");
+                item.erase("type");
+                item["content"] = chatcmpl_content;
+                chatcmpl_messages.push_back(item);
+            } else if (exists_and_is_string(item, "arguments") &&
+                exists_and_is_string(item, "call_id") &&
+                exists_and_is_string(item, "name") &&
+                exists_and_is_string(item, "type") &&
+                item.at("type") == "function_call"
+            ) {
+                // #responses_create-input-input_item_list-item-function_tool_call
+                chatcmpl_messages.push_back(json {
+                    {"role", "assistant"},
+                    {"tool_calls", json::array({ json {
+                        {"function", json {
+                            {"arguments", item.at("arguments")},
+                            {"name",      item.at("name")}
+                        }},
+                        {"id",   item.at("call_id")},
+                        {"type", "function"}
+                    }})},
+                });
+            } else if (exists_and_is_string(item, "call_id") &&
+                (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
+                exists_and_is_string(item, "type") &&
+                item.at("type") == "function_call_output"
+            ) {
+                // #responses_create-input-input_item_list-item-function_tool_call_output
+                if (item.at("output").is_string()) {
+                    chatcmpl_messages.push_back(json {
+                        {"content",      item.at("output")},
+                        {"role",         "tool"},
+                        {"tool_call_id", item.at("call_id")}
+                    });
+                } else {
+                    json chatcmpl_outputs = item.at("output");
+                    for (json & chatcmpl_output : chatcmpl_outputs) {
+                        if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") {
+                            throw std::invalid_argument("Output of tool call should be 'Input text'");
+                        }
+                        chatcmpl_output["type"] = "text";
+                    }
+                    chatcmpl_messages.push_back(json {
+                        {"content",      chatcmpl_outputs},
+                        {"role",         "tool"},
+                        {"tool_call_id", item.at("call_id")}
+                    });
+                }
             } else {
-                throw std::invalid_argument("'content' must be a string or array of objects");
-            }
-
-            const std::string role = json_value(input_message, "role", std::string());
-            if (role != "user" && role != "assistant" && role != "system" && role != "developer") {
-                throw std::invalid_argument("'role' must be one of user, assistant, system, or developer");
-            }
-
-            if (input_message.contains("type") && input_message.at("type") != "message") {
-                throw std::invalid_argument("If 'type' is defined, it should be 'message'");
+                throw std::invalid_argument("Cannot determine type of 'item'");
             }
         }
     } else {
@@ -1172,6 +1260,30 @@ json convert_responses_to_chatcmpl(const json & response_body) {
     chatcmpl_body.erase("input");
     chatcmpl_body["messages"] = chatcmpl_messages;
 
+    if (response_body.contains("tools")) {
+        if (!response_body.at("tools").is_array()) {
+            throw std::invalid_argument("'tools' must be an array of objects");
+        }
+        json chatcmpl_tools = json::array();
+        for (json resp_tool : response_body.at("tools")) {
+            json chatcmpl_tool;
+
+            if (json_value(resp_tool, "type", std::string()) != "function") {
+                throw std::invalid_argument("'type' of tool must be 'function'");
+            }
+            resp_tool.erase("type");
+            chatcmpl_tool["type"] = "function";
+
+            if (!resp_tool.contains("strict")) {
+                resp_tool["strict"] = true;
+            }
+            chatcmpl_tool["function"] = resp_tool;
+            chatcmpl_tools.push_back(chatcmpl_tool);
+        }
+        chatcmpl_body.erase("tools");
+        chatcmpl_body["tools"] = chatcmpl_tools;
+    }
+
     if (response_body.contains("max_output_tokens")) {
         chatcmpl_body.erase("max_output_tokens");
         chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index f8093677d5..f174531ee4 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -851,44 +851,69 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
 
 json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
     json server_sent_events = json::array();
+    json output = json::array();
 
-    server_sent_events.push_back(json {
-        {"event", "response.output_text.done"},
-        {"data", json {
-            {"type", "response.output_text.done"},
-            {"text", oaicompat_msg.content}
-        }}
-    });
+    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
+        server_sent_events.push_back(json {
+            {"event", "response.output_item.done"},
+            {"data", json {
+                {"type", "response.output_item.done"},
+                {"item", json {
+                    {"type",      "function_call"},
+                    {"status",    "completed"},
+                    {"arguments", tool_call.arguments},
+                    {"call_id",   "call_dummy_id"},
+                    {"name",      tool_call.name}
+                }}
+            }}
+        });
+        output.push_back({
+            {"type",      "function_call"},
+            {"status",    "completed"},
+            {"arguments", tool_call.arguments},
+            {"name",      tool_call.name}
+        });
+    }
 
-    const json part = {
-        {"type",        "output_text"},
-        {"annotations", json::array()},
-        {"logprobs",    json::array()},
-        {"text",        oaicompat_msg.content}
-    };
+    if (oaicompat_msg.content != "") {
+        server_sent_events.push_back(json {
+            {"event", "response.output_text.done"},
+            {"data", json {
+                {"type", "response.output_text.done"},
+                {"text", oaicompat_msg.content}
+            }}
+        });
 
-    server_sent_events.push_back(json {
-        {"event", "response.content_part.done"},
-        {"data", json {
-            {"type", "response.content_part.done"},
-            {"part", part}
-        }}
-    });
+        const json part = {
+            {"type",        "output_text"},
+            {"annotations", json::array()},
+            {"logprobs",    json::array()},
+            {"text",        oaicompat_msg.content}
+        };
 
-    const json item = {
-        {"type",    "message"},
-        {"status",  "completed"},
-        {"content", json::array({part})},
-        {"role",    "assistant"}
-    };
+        server_sent_events.push_back(json {
+            {"event", "response.content_part.done"},
+            {"data", json {
+                {"type", "response.content_part.done"},
+                {"part", part}
+            }}
+        });
+        const json item = {
+            {"type",    "message"},
+            {"status",  "completed"},
+            {"content", json::array({part})},
+            {"role",    "assistant"}
+        };
 
-    server_sent_events.push_back(json {
-        {"event", "response.output_item.done"},
-        {"data", json {
-            {"type", "response.output_item.done"},
-            {"item", item}
-        }}
-    });
+        server_sent_events.push_back(json {
+            {"event", "response.output_item.done"},
+            {"data", json {
+                {"type", "response.output_item.done"},
+                {"item", item}
+            }}
+        });
+        output.push_back(item);
+    }
 
     std::time_t t = std::time(0);
     server_sent_events.push_back(json {
@@ -896,11 +921,12 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
         {"data", json {
             {"type", "response.completed"},
             {"response", json {
+                {"id",         "resp_dummy_id"},
                 {"object",     "response"},
                 {"created_at", t},
                 {"status",     "completed"},
                 {"model",      oaicompat_model},
-                {"output",     json::array({item})},
+                {"output",     output},
                 {"usage",      json {
                     {"input_tokens",  n_prompt_tokens},
                     {"output_tokens", n_decoded},
@@ -1191,6 +1217,28 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
                 }}
             });
         }
+        if (!diff.tool_call_delta.name.empty()) {
+            deltas.push_back(json {
+                {"event", "response.output_item.added"},
+                {"data", json {
+                    {"type",  "response.output_item.added"},
+                    {"item", json {
+                        {"type",   "function_call"},
+                        {"status", "in_progress"},
+                        {"name",   diff.tool_call_delta.name}
+                    }}
+                }}
+            });
+        }
+        if (!diff.tool_call_delta.arguments.empty()) {
+            deltas.push_back(json {
+                {"event", "response.function_call_arguments.delta"},
+                {"data", json {
+                    {"type",  "response.function_call_arguments.delta"},
+                    {"delta", diff.tool_call_delta.arguments}
+                }}
+            });
+        }
         if (!diff.content_delta.empty()) {
             deltas.push_back(json {
                 {"event", "response.output_text.delta"},