From 323d23eb1c000b601521d7f0199906c769bc6423 Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Mon, 19 Jan 2026 00:50:29 +0100
Subject: [PATCH 01/10] beta

---
 common/arg.cpp                 |  16 ++
 common/chat-parser.cpp         |  13 +-
 common/common.h                |   5 +
 mcp_config.json                |  13 +
 mcp_dummy.py                   |  82 ++++++
 tests/CMakeLists.txt           |   4 +
 tests/test-mcp-integration.cpp |  71 +++++
 tools/cli/cli.cpp              | 196 ++++++++++---
 tools/cli/mcp.hpp              | 499 +++++++++++++++++++++++++++++++++
 9 files changed, 852 insertions(+), 47 deletions(-)
 create mode 100644 mcp_config.json
 create mode 100644 mcp_dummy.py
 create mode 100644 tests/test-mcp-integration.cpp
 create mode 100644 tools/cli/mcp.hpp

diff --git a/common/arg.cpp b/common/arg.cpp
index 5fbc9022c0..7b243abf76 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -3784,6 +3784,22 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
 
+    add_opt(common_arg(
+        {"--mcp-config"}, "FILE",
+        "path to MCP configuration file",
+        [](common_params & params, const std::string & value) {
+            params.mcp_config = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_CLI}));
+
+    add_opt(common_arg(
+        {"--mcp-yolo"},
+        "auto-approve all MCP tool calls (no user confirmation)",
+        [](common_params & params) {
+            params.mcp_yolo = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_CLI}));
+
     return ctx_arg;
 }
 
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
index 29819e48d3..b4460fd01f 100644
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -1140,6 +1140,9 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
         return;
     }
 
+    // DEBUG: trace input
+    // fprintf(stderr, "hermes_parse: input='%s'\n", builder.input().substr(builder.pos()).c_str());
+
     static const common_regex open_regex(
         "(?:"
             "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
@@ -1160,6 +1163,7 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
     );
 
     while (auto res = builder.try_find_regex(open_regex)) {
+        // fprintf(stderr, "hermes_parse: found regex match\n");
         const auto & block_start = res->groups[1];
         std::string block_end = block_start.empty() ? "" : "```";
 
@@ -1172,16 +1176,23 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
 
             if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
                 if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
+                    fprintf(stderr, "hermes: json tool add failed or partial (partial=%d)\n", tool_call->is_partial);
                     throw common_chat_msg_partial_exception("incomplete tool call");
                 }
                 builder.consume_spaces();
-                builder.consume_literal(close_tag);
+                // builder.consume_literal(close_tag); // Handle mismatched close tag gracefully?
+                if (!builder.try_consume_literal(close_tag)) {
+                     fprintf(stderr, "hermes: failed to consume close tag '%s'. Remaining: '%s'\n", close_tag.c_str(), builder.input().substr(builder.pos()).c_str());
+                     // If closing tag is missing, is it partial?
+                     throw common_chat_msg_partial_exception("missing close tag");
+                }
                 builder.consume_spaces();
                 if (!block_end.empty()) {
                     builder.consume_literal(block_end);
                     builder.consume_spaces();
                 }
             } else {
+                fprintf(stderr, "hermes: failed to consume json\n");
                 throw common_chat_msg_partial_exception("failed to parse tool call");
             }
         } else {
diff --git a/common/common.h b/common/common.h
index 398ebb0960..aa74464a92 100644
--- a/common/common.h
+++ b/common/common.h
@@ -543,6 +543,11 @@ struct common_params {
 
     std::map<std::string, std::string> default_template_kwargs;
 
+    // MCP params
+    std::string mcp_config  = "";
+    std::string mcp_servers = "";
+    bool        mcp_yolo    = false;
+
     // webui configs
     bool webui = true;
     std::string webui_config_json;
diff --git a/mcp_config.json b/mcp_config.json
new file mode 100644
index 0000000000..d531881607
--- /dev/null
+++ b/mcp_config.json
@@ -0,0 +1,13 @@
+{
+  "mcpServers": {
+    "serena": {
+      "command": "uvx",
+      "args": [
+        "--from",
+        "git+https://github.com/oraios/serena",
+        "serena",
+        "start-mcp-server"
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/mcp_dummy.py b/mcp_dummy.py
new file mode 100644
index 0000000000..1fff089407
--- /dev/null
+++ b/mcp_dummy.py
@@ -0,0 +1,82 @@
+import sys
+import os
+import json
+import logging
+
+logging.basicConfig(filename='/devel/tools/llama.cpp/mcp_dummy.log', level=logging.DEBUG)
+
+def main():
+    logging.info("Starting MCP Dummy Server")
+    while True:
+        try:
+            line = sys.stdin.readline()
+            if not line:
+                break
+            logging.info(f"Received: {line.strip()}")
+            try:
+                req = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            
+            if "method" in req:
+                method = req["method"]
+                req_id = req.get("id")
+                
+                resp = {"jsonrpc": "2.0", "id": req_id}
+                
+                if method == "initialize":
+                    resp["result"] = {
+                        "protocolVersion": "2024-11-05",
+                        "capabilities": {},
+                        "serverInfo": {"name": "dummy", "version": "1.0"}
+                    }
+                elif method == "tools/list":
+                    resp["result"] = {
+                        "tools": [
+                            {
+                                "name": "get_weather",
+                                "description": "Get weather for a location",
+                                "inputSchema": {
+                                    "type": "object",
+                                    "properties": {
+                                        "location": {"type": "string"},
+                                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
+                                    },
+                                    "required": ["location"]
+                                }
+                            }
+                        ]
+                    }
+                elif method == "tools/call":
+                    params = req.get("params", {})
+                    name = params.get("name")
+                    args = params.get("arguments", {})
+                    
+                    logging.info(f"Tool call: {name} with {args}")
+                    
+                    content = [{"type": "text", "text": f"Weather in {args.get('location')} is 25C"}]
+                    # For simplicity, return raw content or follow MCP spec?
+                    # MCP spec: result: { content: [ {type: "text", text: "..."} ] }
+                    # My mcp.hpp returns res["result"].
+                    # My cli.cpp dumps res.dump().
+                    # So passing full result object is fine.
+                    resp["result"] = {
+                        "content": content
+                    }
+                else:
+                    # Ignore notifications or other methods
+                    if req_id is not None:
+                         resp["error"] = {"code": -32601, "message": "Method not found"}
+                    else:
+                        continue
+
+                logging.info(f"Sending: {json.dumps(resp)}")
+                if req_id is not None:
+                    sys.stdout.write(json.dumps(resp) + "\n\n")
+                    sys.stdout.flush()
+        except Exception as e:
+            logging.error(f"Error: {e}")
+            break
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index c9436c5995..5f07075213 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,5 +1,9 @@
 llama_add_compile_flags()
 
+add_executable(test-mcp-integration test-mcp-integration.cpp)
+    target_link_libraries(test-mcp-integration PRIVATE common)
+target_include_directories(test-mcp-integration PRIVATE ../tools/cli)
+
 function(llama_build source)
     set(TEST_SOURCES ${source} ${ARGN})
 
diff --git a/tests/test-mcp-integration.cpp b/tests/test-mcp-integration.cpp
new file mode 100644
index 0000000000..fe7d189872
--- /dev/null
+++ b/tests/test-mcp-integration.cpp
@@ -0,0 +1,71 @@
+#include "../tools/cli/mcp.hpp"
+#include "common.h"
+#include "log.h"
+
+#include <chrono>
+#include <iostream>
+#include <string>
+#include <thread>
+#include <vector>
+
+int main(int argc, char ** argv) {
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <mcp_config_file>\n", argv[0]);
+        return 1;
+    }
+
+    std::string config_file = argv[1];
+
+    printf("Testing MCP integration with config: %s\n", config_file.c_str());
+
+    mcp_context mcp;
+    std::string tool_to_run;
+    json        tool_args = json::object();
+    std::string servers_arg = "";
+
+    if (argc >= 3) {
+        tool_to_run = argv[2];
+    }
+    if (argc >= 4) {
+        try {
+            tool_args = json::parse(argv[3]);
+        } catch (const std::exception & e) {
+            fprintf(stderr, "Error parsing tool arguments JSON: %s\n", e.what());
+            return 1;
+        }
+    }
+
+    if (mcp.load_config(config_file, servers_arg)) {
+        printf("MCP config loaded successfully.\n");
+
+        auto tools = mcp.get_tools();
+        printf("Found %zu tools.\n", tools.size());
+
+        for (const auto & tool : tools) {
+            printf("Tool: %s\n", tool.name.c_str());
+            printf("  Description: %s\n", tool.description.c_str());
+        }
+
+        if (!tool_to_run.empty()) {
+            printf("Calling tool %s...\n", tool_to_run.c_str());
+            mcp.set_yolo(true);
+            json res = mcp.call_tool(tool_to_run, tool_args);
+            printf("Result: %s\n", res.dump().c_str());
+        } else if (!tools.empty()) {
+            printf("No tool specified. Calling first tool '%s' with empty args as smoke test...\n", tools[0].name.c_str());
+             json args = json::object();
+            mcp.set_yolo(true);
+            json res = mcp.call_tool(tools[0].name, args);
+            printf("Result: %s\n", res.dump().c_str());
+        }
+
+    } else {
+        printf("Failed to load MCP config.\n");
+        return 1;
+    }
+
+    // Allow some time for threads to shutdown if any
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+
+    return 0;
+}
diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp
index 02ccb72598..f9ad5094b6 100644
--- a/tools/cli/cli.cpp
+++ b/tools/cli/cli.cpp
@@ -1,22 +1,25 @@
-#include "common.h"
 #include "arg.h"
+#include "chat.h"
+#include "common.h"
 #include "console.h"
 // #include "log.h"
 
+#include "mcp.hpp"
 #include "server-context.h"
 #include "server-task.h"
 
+#include <signal.h>
+
 #include <atomic>
 #include <fstream>
 #include <thread>
-#include <signal.h>
 
 #if defined(_WIN32)
-#define WIN32_LEAN_AND_MEAN
-#ifndef NOMINMAX
-#   define NOMINMAX
-#endif
-#include <windows.h>
+#    define WIN32_LEAN_AND_MEAN
+#    ifndef NOMINMAX
+#        define NOMINMAX
+#    endif
+#    include <windows.h>
 #endif
 
 const char * LLAMA_ASCII_LOGO = R"(
@@ -30,11 +33,12 @@ const char * LLAMA_ASCII_LOGO = R"(
 )";
 
 static std::atomic<bool> g_is_interrupted = false;
+
 static bool should_stop() {
     return g_is_interrupted.load();
 }
 
-#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
 static void signal_handler(int) {
     if (g_is_interrupted.load()) {
         // second Ctrl+C - exit immediately
@@ -48,10 +52,12 @@ static void signal_handler(int) {
 #endif
 
 struct cli_context {
-    server_context ctx_server;
-    json messages = json::array();
+    server_context          ctx_server;
+    json                    messages = json::array();
+    json                    tools    = json::array();
     std::vector<raw_buffer> input_files;
-    task_params defaults;
+    task_params             defaults;
+    std::set<std::string>   approved_requests;
 
     // thread for showing "loading" animation
     std::atomic<bool> loading_show;
@@ -63,12 +69,12 @@ struct cli_context {
         defaults.n_predict   = params.n_predict;
         defaults.antiprompt  = params.antiprompt;
 
-        defaults.stream = true; // make sure we always use streaming mode
-        defaults.timings_per_token = true; // in order to get timings even when we cancel mid-way
+        defaults.stream                                 = true;  // make sure we always use streaming mode
+        defaults.timings_per_token                      = true;  // in order to get timings even when we cancel mid-way
         // defaults.return_progress = true; // TODO: show progress
     }
 
-    std::string generate_completion(result_timings & out_timings) {
+    std::string generate_completion(result_timings & out_timings, server_task_result_ptr & out_result) {
         server_response_reader rd = ctx_server.get_response_reader();
         auto chat_params = format_chat();
         {
@@ -88,7 +94,7 @@ struct cli_context {
                 task.params.chat_parser_params.parser.load(chat_params.parser);
             }
 
-            rd.post_task({std::move(task)});
+            rd.post_task({ std::move(task) });
         }
 
         // wait for first result
@@ -97,7 +103,7 @@ struct cli_context {
 
         console::spinner::stop();
         std::string curr_content;
-        bool is_thinking = false;
+        bool        is_thinking = false;
 
         while (result) {
             if (should_stop()) {
@@ -110,6 +116,7 @@ struct cli_context {
                 } else {
                     console::error("Error: %s\n", err_data.dump().c_str());
                 }
+                out_result = std::move(result);
                 return curr_content;
             }
             auto res_partial = dynamic_cast<server_task_result_cmpl_partial *>(result.get());
@@ -140,6 +147,7 @@ struct cli_context {
             auto res_final = dynamic_cast<server_task_result_cmpl_final *>(result.get());
             if (res_final) {
                 out_timings = std::move(res_final->timings);
+                out_result  = std::move(result);
                 break;
             }
             result = rd.next(should_stop);
@@ -166,14 +174,14 @@ struct cli_context {
         }
     }
 
-    common_chat_params format_chat() {
+    common_chat_params format_chat() const {
         auto meta = ctx_server.get_meta();
         auto & chat_params = meta.chat_params;
 
         common_chat_templates_inputs inputs;
         inputs.messages              = common_chat_msgs_parse_oaicompat(messages);
-        inputs.tools                 = {}; // TODO
-        inputs.tool_choice           = COMMON_CHAT_TOOL_CHOICE_NONE;
+        inputs.tools                 = common_chat_tools_parse_oaicompat(tools);
+        inputs.tool_choice           = tools.empty() ? COMMON_CHAT_TOOL_CHOICE_NONE : COMMON_CHAT_TOOL_CHOICE_AUTO;
         inputs.json_schema           = ""; // TODO
         inputs.grammar               = ""; // TODO
         inputs.use_jinja             = chat_params.use_jinja;
@@ -189,7 +197,7 @@ struct cli_context {
 int main(int argc, char ** argv) {
     common_params params;
 
-    params.verbosity = LOG_LEVEL_ERROR; // by default, less verbose logs
+    params.verbosity = LOG_LEVEL_ERROR;  // by default, less verbose logs
 
     if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CLI)) {
         return 1;
@@ -215,21 +223,43 @@ int main(int argc, char ** argv) {
 
     console::set_display(DISPLAY_TYPE_RESET);
 
-#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
     struct sigaction sigint_action;
     sigint_action.sa_handler = signal_handler;
-    sigemptyset (&sigint_action.sa_mask);
+    sigemptyset(&sigint_action.sa_mask);
     sigint_action.sa_flags = 0;
     sigaction(SIGINT, &sigint_action, NULL);
     sigaction(SIGTERM, &sigint_action, NULL);
-#elif defined (_WIN32)
+#elif defined(_WIN32)
     auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
         return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
     };
     SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
 #endif
 
-    console::log("\nLoading model... "); // followed by loading animation
+    // Initialize MCP
+    mcp_context mcp;
+    if (!params.mcp_config.empty()) {
+        if (mcp.load_config(params.mcp_config, params.mcp_servers)) {
+            mcp.set_yolo(params.mcp_yolo);
+            auto mcp_tools = mcp.get_tools();
+            for (const auto & t : mcp_tools) {
+                json tool = {
+                    { "type",     "function"                                                                    },
+                    { "function",
+                     { { "name", t.name }, { "description", t.description }, { "parameters", t.input_schema } } }
+                };
+                ctx_cli.tools.push_back(tool);
+            }
+            if (!ctx_cli.tools.empty()) {
+                console::log("Enabled %d MCP tools\n", (int) ctx_cli.tools.size());
+            }
+        } else {
+            console::error("Failed to load MCP config: %s\n", params.mcp_config.c_str());
+        }
+    }
+
+    console::log("\nLoading model... ");  // followed by loading animation
     console::spinner::start();
     if (!ctx_cli.ctx_server.load_model(params)) {
         console::spinner::stop();
@@ -240,11 +270,9 @@ int main(int argc, char ** argv) {
     console::spinner::stop();
     console::log("\n");
 
-    std::thread inference_thread([&ctx_cli]() {
-        ctx_cli.ctx_server.start_loop();
-    });
+    std::thread inference_thread([&ctx_cli]() { ctx_cli.ctx_server.start_loop(); });
 
-    auto inf = ctx_cli.ctx_server.get_meta();
+    auto        inf        = ctx_cli.ctx_server.get_meta();
     std::string modalities = "text";
     if (inf.has_inp_image) {
         modalities += ", vision";
@@ -255,8 +283,8 @@ int main(int argc, char ** argv) {
 
     if (!params.system_prompt.empty()) {
         ctx_cli.messages.push_back({
-            {"role",    "system"},
-            {"content", params.system_prompt}
+            { "role",    "system"             },
+            { "content", params.system_prompt }
         });
     }
 
@@ -290,7 +318,7 @@ int main(int argc, char ** argv) {
         if (params.prompt.empty()) {
             console::log("\n> ");
             std::string line;
-            bool another_line = true;
+            bool        another_line = true;
             do {
                 another_line = console::readline(line, params.multiline_input);
                 buffer += line;
@@ -312,7 +340,7 @@ int main(int argc, char ** argv) {
             } else {
                 console::log("\n> %s\n", buffer.c_str());
             }
-            params.prompt.clear(); // only use it once
+            params.prompt.clear();  // only use it once
         }
         console::set_display(DISPLAY_TYPE_RESET);
         console::log("\n");
@@ -323,7 +351,7 @@ int main(int argc, char ** argv) {
         }
 
         // remove trailing newline
-        if (!buffer.empty() &&buffer.back() == '\n') {
+        if (!buffer.empty() && buffer.back() == '\n') {
             buffer.pop_back();
         }
 
@@ -351,11 +379,10 @@ int main(int argc, char ** argv) {
             ctx_cli.input_files.clear();
             console::log("Chat history cleared.\n");
             continue;
-        } else if (
-                (string_starts_with(buffer, "/image ") && inf.has_inp_image) ||
-                (string_starts_with(buffer, "/audio ") && inf.has_inp_audio)) {
+        } else if ((string_starts_with(buffer, "/image ") && inf.has_inp_image) ||
+                   (string_starts_with(buffer, "/audio ") && inf.has_inp_audio)) {
             // just in case (bad copy-paste for example), we strip all trailing/leading spaces
-            std::string fname = string_strip(buffer.substr(7));
+            std::string fname  = string_strip(buffer.substr(7));
             std::string marker = ctx_cli.load_input_file(fname, true);
             if (marker.empty()) {
                 console::error("file does not exist or cannot be opened: '%s'\n", fname.c_str());
@@ -365,7 +392,7 @@ int main(int argc, char ** argv) {
             console::log("Loaded media from '%s'\n", fname.c_str());
             continue;
         } else if (string_starts_with(buffer, "/read ")) {
-            std::string fname = string_strip(buffer.substr(6));
+            std::string fname  = string_strip(buffer.substr(6));
             std::string marker = ctx_cli.load_input_file(fname, false);
             if (marker.empty()) {
                 console::error("file does not exist or cannot be opened: '%s'\n", fname.c_str());
@@ -382,23 +409,100 @@ int main(int argc, char ** argv) {
         // generate response
         if (add_user_msg) {
             ctx_cli.messages.push_back({
-                {"role",    "user"},
-                {"content", cur_msg}
+                { "role",    "user"  },
+                { "content", cur_msg }
             });
             cur_msg.clear();
         }
         result_timings timings;
-        std::string assistant_content = ctx_cli.generate_completion(timings);
-        ctx_cli.messages.push_back({
-            {"role",    "assistant"},
-            {"content", assistant_content}
-        });
+        while (true) {
+            server_task_result_ptr result;
+            std::string            assistant_content = ctx_cli.generate_completion(timings, result);
+            auto                   res_final         = dynamic_cast<server_task_result_cmpl_final *>(result.get());
+
+            if (res_final && !res_final->oaicompat_msg.tool_calls.empty()) {
+                ctx_cli.messages.push_back(res_final->oaicompat_msg.to_json_oaicompat());
+
+                for (const auto & tc : res_final->oaicompat_msg.tool_calls) {
+                    json args;
+                    try {
+                        if (tc.arguments.empty()) {
+                            args = json::object();
+                        } else {
+                            args = json::parse(tc.arguments);
+                        }
+                    } catch (...) {
+                        json err_msg = {
+                            { "role",         "tool"                    },
+                            { "content",      "Error parsing arguments" },
+                            { "tool_call_id", tc.id                     },
+                            { "name",         tc.name                   }
+                        };
+                        ctx_cli.messages.push_back(err_msg);
+                        continue;
+                    }
+
+                    std::string request_id = tc.name + tc.arguments;
+                    if (!mcp.get_yolo() && ctx_cli.approved_requests.find(request_id) == ctx_cli.approved_requests.end()) {
+                         // Prompt user
+                         fprintf(stdout, "\n\033[1;33mTool call: %s\033[0m\n", tc.name.c_str());
+                         fprintf(stdout, "Arguments: %s\n", args.dump(2).c_str());
+                         fprintf(stdout, "Approve? [y]es, [n]o, [A]lways allow feature: ");
+                         fflush(stdout);
+
+                         char c = ' ';
+                         std::string line;
+                         // We are in main loop which might have console settings. 
+                         // Use console::readline or similar? 
+                         // But for single char input, standard cin/getline might interfere with console lib state if it's separate.
+                         // Given `console::readline` is used above, let's use standard input as fallback or just try cin.
+                         
+                         // Simple blocking read
+                         std::cin >> c;
+                         std::getline(std::cin, line); // consume rest
+
+                         if (c == 'y' || c == 'Y') {
+                             // approved once
+                         } else if (c == 'A') {
+                             ctx_cli.approved_requests.insert(request_id);
+                         } else {
+                             json err_msg = {
+                                { "role",         "tool"                    },
+                                { "content",      "User denied tool execution" },
+                                { "tool_call_id", tc.id                     },
+                                { "name",         tc.name                   }
+                            };
+                            ctx_cli.messages.push_back(err_msg);
+                            continue;
+                         }
+                    }
+
+                    json res      = mcp.call_tool(tc.name, args);
+                    json tool_msg = {
+                        { "role",         "tool"     },
+                        { "content",      res.dump() },
+                        { "tool_call_id", tc.id      },
+                        { "name",         tc.name    }
+                    };
+                    ctx_cli.messages.push_back(tool_msg);
+                }
+                // continue loop to generate with tool results
+            } else {
+                json assistant_msg = {
+                    { "role",    "assistant"       },
+                    { "content", assistant_content }
+                };
+                ctx_cli.messages.push_back(assistant_msg);
+                break;
+            }
+        }
         console::log("\n");
 
         if (params.show_timings) {
             console::set_display(DISPLAY_TYPE_INFO);
             console::log("\n");
-            console::log("[ Prompt: %.1f t/s | Generation: %.1f t/s ]\n", timings.prompt_per_second, timings.predicted_per_second);
+            console::log("[ Prompt: %.1f t/s | Generation: %.1f t/s ]\n", timings.prompt_per_second,
+                         timings.predicted_per_second);
             console::set_display(DISPLAY_TYPE_RESET);
         }
 
diff --git a/tools/cli/mcp.hpp b/tools/cli/mcp.hpp
new file mode 100644
index 0000000000..62662f1ef0
--- /dev/null
+++ b/tools/cli/mcp.hpp
@@ -0,0 +1,499 @@
+#pragma once
+
+#include "../../vendor/sheredom/subprocess.h"
+#include "common.h"
+#include "log.h"
+
+#include <atomic>
+#include <condition_variable>
+#include <fstream>
+#include <future>
+#include <iostream>
+#include <map>
+#include <mutex>
+#include <nlohmann/json.hpp>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <vector>
+
+using json = nlohmann::ordered_json;
+
+struct mcp_tool {
+    std::string name;
+    std::string description;
+    json        input_schema;
+    std::string server_name;
+};
+
+class mcp_server {
+    std::string                        name;
+    std::string                        command;
+    std::vector<std::string>           args;
+    std::map<std::string, std::string> env;
+
+    struct subprocess_s process = {};
+    std::atomic<bool>   running{ false };
+    std::thread         read_thread;
+    std::thread         err_thread;
+    std::atomic<bool>   stop_read{ false };
+
+    std::mutex                        mutex;
+    int                               next_id = 1;
+    std::map<int, std::promise<json>> pending_requests;
+
+    // Buffer for reading
+    std::string read_buffer;
+
+  public:
+    mcp_server(const std::string &                        name,
+               const std::string &                        cmd,
+               const std::vector<std::string> &           args,
+               const std::map<std::string, std::string> & env) :
+        name(name),
+        command(cmd),
+        args(args),
+        env(env) {}
+
+    mcp_server(const mcp_server &)            = delete;
+    mcp_server & operator=(const mcp_server &) = delete;
+
+    mcp_server(mcp_server && other) noexcept :
+        name(std::move(other.name)),
+        command(std::move(other.command)),
+        args(std::move(other.args)),
+        env(std::move(other.env)),
+        process(other.process),
+        running(other.running.load()),
+        read_thread(std::move(other.read_thread)),
+        err_thread(std::move(other.err_thread)),
+        stop_read(other.stop_read.load()),
+        next_id(other.next_id),
+        pending_requests(std::move(other.pending_requests)),
+        read_buffer(std::move(other.read_buffer)) {
+        // Zero out the source process to prevent double-free
+        other.process = {};
+        other.running = false;
+    }
+
+    mcp_server & operator=(mcp_server && other) noexcept {
+        if (this != &other) {
+            stop(); // Clean up current resources
+
+            name             = std::move(other.name);
+            command          = std::move(other.command);
+            args             = std::move(other.args);
+            env              = std::move(other.env);
+            process          = other.process;
+            running          = other.running.load();
+            read_thread      = std::move(other.read_thread);
+            err_thread       = std::move(other.err_thread);
+            stop_read        = other.stop_read.load();
+            next_id          = other.next_id;
+            pending_requests = std::move(other.pending_requests);
+            read_buffer      = std::move(other.read_buffer);
+
+            // Zero out source
+            other.process = {};
+            other.running = false;
+        }
+        return *this;
+    }
+
+    ~mcp_server() { stop(); }
+
+    bool start() {
+        std::vector<const char *> cmd_args;
+        cmd_args.push_back(command.c_str());
+        for (const auto & arg : args) {
+            cmd_args.push_back(arg.c_str());
+        }
+        cmd_args.push_back(nullptr);
+
+        std::vector<const char *> env_vars;
+        std::vector<std::string>  env_strings;  // keep strings alive
+        if (!env.empty()) {
+            for (const auto & kv : env) {
+                env_strings.push_back(kv.first + "=" + kv.second);
+            }
+            for (const auto & s : env_strings) {
+                env_vars.push_back(s.c_str());
+            }
+            env_vars.push_back(nullptr);
+        }
+
+        // Blocking I/O is simpler with threads
+        int options = subprocess_option_search_user_path;
+        int result;
+        if (env.empty()) {
+            options |= subprocess_option_inherit_environment;
+            result = subprocess_create(cmd_args.data(), options, &process);
+        } else {
+            result = subprocess_create_ex(cmd_args.data(), options, env_vars.data(), &process);
+        }
+
+        if (result != 0) {
+            LOG_ERR("Failed to start MCP server %s: error %d (%s)\n", name.c_str(), errno, strerror(errno));
+            return false;
+        }
+
+        running     = true;
+        read_thread = std::thread(&mcp_server::read_loop, this);
+        err_thread  = std::thread(&mcp_server::err_loop, this);
+
+        return true;
+    }
+
+    void stop() {
+        if (!running) return;
+        LOG_INF("Stopping MCP server %s...\n", name.c_str());
+        stop_read = true;
+
+        // 1. Close stdin to signal EOF
+        if (process.stdin_file) {
+            fclose(process.stdin_file);
+            process.stdin_file = nullptr;
+        }
+
+        // 2. Wait for 10 seconds for normal termination
+        bool terminated = false;
+        for (int i = 0; i < 100; ++i) { // 100 * 100ms = 10s
+             if (subprocess_alive(&process) == 0) {
+                 terminated = true;
+                 break;
+             }
+             std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        }
+
+        // 3. Terminate if still running
+        if (!terminated) {
+            LOG_WRN("MCP server %s did not exit gracefully, terminating...\n", name.c_str());
+            subprocess_terminate(&process);
+        }
+
+        // 4. Join threads
+        if (read_thread.joinable()) {
+            read_thread.join();
+        }
+        if (err_thread.joinable()) {
+            err_thread.join();
+        }
+        
+        // 5. Cleanup
+        if (running) {
+             subprocess_destroy(&process);
+             running = false;
+        }
+        LOG_INF("MCP server %s stopped.\n", name.c_str());
+    }
+
+    json send_request(const std::string & method, const json & params = json::object()) {
+        int               id;
+        std::future<json> future;
+        {
+            std::lock_guard<std::mutex> lock(mutex);
+            id     = next_id++;
+            future = pending_requests[id].get_future();
+        }
+
+        json req = {
+            { "jsonrpc", "2.0"  },
+            { "id",      id     },
+            { "method",  method },
+            { "params",  params }
+        };
+
+        std::string req_str    = req.dump() + "\n";
+        FILE *      stdin_file = subprocess_stdin(&process);
+        if (stdin_file) {
+            fwrite(req_str.c_str(), 1, req_str.size(), stdin_file);
+            fflush(stdin_file);
+        }
+
+        // Wait for response with timeout
+        if (future.wait_for(std::chrono::seconds(10)) == std::future_status::timeout) {
+            LOG_ERR("Timeout waiting for response from %s (method: %s)\n", name.c_str(), method.c_str());
+            std::lock_guard<std::mutex> lock(mutex);
+            pending_requests.erase(id);
+            return nullptr;
+        }
+
+        return future.get();
+    }
+
+    void send_notification(const std::string & method, const json & params = json::object()) {
+        json req = {
+            { "jsonrpc", "2.0"  },
+            { "method",  method },
+            { "params",  params }
+        };
+
+        std::string req_str    = req.dump() + "\n";
+        FILE *      stdin_file = subprocess_stdin(&process);
+        if (stdin_file) {
+            fwrite(req_str.c_str(), 1, req_str.size(), stdin_file);
+            fflush(stdin_file);
+        }
+    }
+
+    // Initialize handshake
+    bool initialize() {
+        // Send initialize
+        json init_params = {
+            { "protocolVersion", "2024-11-05" },
+            { "capabilities", {
+                { "roots", {
+                    { "listChanged", false }
+                } },
+                { "sampling", json::object() }
+            } },
+            { "clientInfo", {
+                { "name", "llama.cpp-cli" },
+                { "version", "0.1.0" }  // TODO: use real version
+            } }
+        };
+
+        json res = send_request("initialize", init_params);
+        if (res.is_null() || res.contains("error")) {
+            LOG_ERR("Failed to initialize MCP server %s\n", name.c_str());
+            return false;
+        }
+
+        // Send initialized notification
+        send_notification("notifications/initialized");
+        return true;
+    }
+
+    std::vector<mcp_tool> list_tools() {
+        std::vector<mcp_tool> tools;
+        json                  res = send_request("tools/list");
+        if (res.is_null() || res.contains("error")) {
+            LOG_ERR("Failed to list tools from %s\n", name.c_str());
+            return tools;
+        }
+
+        if (res.contains("result") && res["result"].contains("tools")) {
+            for (const auto & t : res["result"]["tools"]) {
+                mcp_tool tool;
+                tool.name = t["name"].get<std::string>();
+                if (t.contains("description")) {
+                    tool.description = t["description"].get<std::string>();
+                }
+                if (t.contains("inputSchema")) {
+                    tool.input_schema = t["inputSchema"];
+                }
+                tool.server_name = name;
+                tools.push_back(tool);
+            }
+        }
+        return tools;
+    }
+
+    json call_tool(const std::string & tool_name, const json & args) {
+        json params = {
+            { "name",      tool_name },
+            { "arguments", args      }
+        };
+        json res = send_request("tools/call", params);
+        if (res.is_null() || res.contains("error")) {
+            return {
+                { "error", res.contains("error") ? res["error"] : "Unknown error" }
+            };
+        }
+        if (res.contains("result")) {
+            return res["result"];
+        }
+        return {
+            { "error", "No result returned" }
+        };
+    }
+
+  private:
+    void read_loop() {
+        char buffer[4096];
+        while (!stop_read && running) {
+            unsigned bytes_read = subprocess_read_stdout(&process, buffer, sizeof(buffer));
+
+            if (bytes_read == 0) {
+                // If blocking read returns 0, it means EOF (process exited or pipe closed).
+                // We should NOT call subprocess_alive() here because it calls subprocess_join()
+                // which modifies the process struct (closes stdin) and causes race conditions/double-free
+                // when stop() is called concurrently.
+                // Just break the loop. The process is likely dead or dying.
+                if (!stop_read) {
+                    LOG_ERR("MCP process died (stdout closed)\n");
+                }
+                running = false;
+                break;
+            }
+
+            read_buffer.append(buffer, bytes_read);
+            size_t pos;
+            while ((pos = read_buffer.find('\n')) != std::string::npos) {
+                std::string line = read_buffer.substr(0, pos);
+                read_buffer.erase(0, pos + 1);
+
+                if (line.empty()) {
+                    continue;
+                }
+
+                try {
+                    json msg = json::parse(line);
+                    if (msg.contains("id")) {
+                        // Response
+                        int id = msg["id"].get<int>();
+                        std::lock_guard<std::mutex> lock(mutex);
+                        if (pending_requests.count(id)) {
+                            pending_requests[id].set_value(msg);
+                            pending_requests.erase(id);
+                        } else {
+                            // ID not found
+                        }
+                    } else {
+                        // Notification or request from server -> ignore for now or log
+                        // MCP servers might send notifications (e.g. logging)
+                        LOG_ERR("MCP Notification from %s: %s\n", name.c_str(), line.c_str());
+                    }
+                } catch (const std::exception & e) {
+                    // Not a full JSON yet? Or invalid?
+                    // If it was a line, it should be valid JSON-RPC
+                    LOG_WRN("Failed to parse JSON from %s: %s\n", name.c_str(), e.what());
+                }
+            }
+        }
+    }
+
+    void err_loop() {
+        char buffer[1024];
+        while (!stop_read && running) {
+            unsigned bytes_read = subprocess_read_stderr(&process, buffer, sizeof(buffer));
+            if (bytes_read > 0) {
+                if (stop_read) break; // Don't log stderr during shutdown
+                std::string err_str(buffer, bytes_read);
+                // Filter out empty/whitespace-only stderr if desired, or just keep it.
+                // User said "extra logging that passes the stderr here is unnecessary" referring to shutdown.
+                LOG_WRN("[%s stderr] %s", name.c_str(), err_str.c_str());
+            } else {
+                // EOF
+                break;
+            }
+        }
+    }
+};
+
+class mcp_context {
+    std::map<std::string, std::shared_ptr<mcp_server>> servers;
+    std::vector<mcp_tool>                              tools;
+    bool                                               yolo = false;
+
+  public:
+    void set_yolo(bool y) { yolo = y; }
+
+    bool load_config(const std::string & config_path, const std::string & enabled_servers_str) {
+        std::ifstream f(config_path);
+        if (!f) {
+            return false;
+        }
+
+        json config;
+        try {
+            f >> config;
+        } catch (...) {
+            return false;
+        }
+
+        std::vector<std::string> enabled_list;
+        std::stringstream        ss(enabled_servers_str);
+        std::string              item;
+        while (std::getline(ss, item, ',')) {
+            if (!item.empty()) {
+                enabled_list.push_back(item);
+            }
+        }
+
+        if (config.contains("mcpServers")) {
+            std::string server_list;
+            for (auto & [key, val] : config["mcpServers"].items()) {
+                if (!server_list.empty()) server_list += ", ";
+                server_list += key;
+            }
+            LOG_INF("MCP configuration found with servers: %s\n", server_list.c_str());
+
+            for (auto & [key, val] : config["mcpServers"].items()) {
+                // If enabled_servers_str is empty, enable all? User said "possibility to pick which MCP servers to enable".
+                // If the user specifies explicit list, we filter. If not, maybe we shouldn't enable any or enable all?
+                // The prompt says "possibility to pick".
+                // Let's assume if list provided, use it. If not, enable all? Or none?
+                // User provided example implies explicit enabling might be desired.
+                // Let's assume if `enabled_servers_str` is not empty, we filter.
+
+                bool enabled = true;
+                if (!enabled_list.empty()) {
+                    bool found = false;
+                    for (const auto & s : enabled_list) {
+                        if (s == key) {
+                            found = true;
+                        }
+                    }
+                    if (!found) {
+                        enabled = false;
+                    }
+                }
+
+                if (enabled) {
+                    std::string                        cmd  = val["command"].get<std::string>();
+                    std::vector<std::string>           args = val.value("args", std::vector<std::string>{});
+                    std::map<std::string, std::string> env;
+                    if (val.contains("env")) {
+                        for (auto & [ek, ev] : val["env"].items()) {
+                            env[ek] = ev.get<std::string>();
+                        }
+                    }
+
+                    auto server = std::make_shared<mcp_server>(key, cmd, args, env);
+                    LOG_INF("Trying to start MCP server: %s...\n", key.c_str());
+                    if (server->start()) {
+                        if (server->initialize()) {
+                            servers[key] = server;
+                            LOG_INF("MCP Server '%s' started and initialized.\n", key.c_str());
+
+                            auto server_tools = server->list_tools();
+                            tools.insert(tools.end(), server_tools.begin(), server_tools.end());
+                        } else {
+                            LOG_ERR("MCP Server '%s' failed to initialize.\n", key.c_str());
+                        }
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    std::vector<mcp_tool> get_tools() const { return tools; }
+
+    bool get_yolo() const { return yolo; }
+
+    json call_tool(const std::string & tool_name, const json & args) {
+        // Find which server has this tool
+        std::string server_name;
+        for (const auto & t : tools) {
+            if (t.name == tool_name) {
+                server_name = t.server_name;
+                break;
+            }
+        }
+
+        if (server_name.empty()) {
+            return {
+                { "error", "Tool not found" }
+            };
+        }
+
+        if (servers.count(server_name)) {
+            return servers[server_name]->call_tool(tool_name, args);
+        }
+        return {
+            { "error", "Server not found" }
+        };
+    }
+};

From 4e1fb733e23e999646a602eb06c1b4f2f92f0ef4 Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 19:17:37 +0100
Subject: [PATCH 02/10] Minor stuff

---
 tests/test-chat.cpp |  8 ++---
 tools/cli/cli.cpp   | 73 ++++++++++++++++++++++-----------------------
 tools/cli/mcp.hpp   | 38 +++++++++++++++++------
 3 files changed, 68 insertions(+), 51 deletions(-)

diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 4378a8db71..e12805e2f3 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -69,8 +69,8 @@ static common_chat_msg normalize(const common_chat_msg & msg) {
     for (auto & tool_call : normalized.tool_calls) {
         try {
             tool_call.arguments = json::parse(tool_call.arguments).dump();
-        } catch (const std::exception &) {
-            // Do nothing
+        } catch (const std::exception &e) {
+            LOG_DBG("Normalize failed on tool call: %s\n", e.what());
         }
     }
     return normalized;
@@ -183,7 +183,7 @@ static void assert_msg_equals(const common_chat_msg & expected, const common_cha
     }
 }
 
-common_chat_tool special_function_tool {
+static common_chat_tool special_function_tool {
     /* .name = */ "special_function",
     /* .description = */ "I'm special",
     /* .parameters = */ R"({
@@ -197,7 +197,7 @@ common_chat_tool special_function_tool {
         "required": ["arg1"]
     })",
 };
-common_chat_tool special_function_tool_with_optional_param {
+static common_chat_tool special_function_tool_with_optional_param {
     /* .name = */ "special_function_with_opt",
     /* .description = */ "I'm special but have optional stuff",
     /* .parameters = */ R"({
diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp
index f9ad5094b6..1e63ab9aa5 100644
--- a/tools/cli/cli.cpp
+++ b/tools/cli/cli.cpp
@@ -22,7 +22,7 @@
 #    include <windows.h>
 #endif
 
-const char * LLAMA_ASCII_LOGO = R"(
+static const char * LLAMA_ASCII_LOGO = R"(
 ▄▄ ▄▄
 ██ ██
 ██ ██  ▀▀█▄ ███▄███▄  ▀▀█▄    ▄████ ████▄ ████▄
@@ -39,7 +39,7 @@ static bool should_stop() {
 }
 
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
-static void signal_handler(int) {
+static void signal_handler(int /*unused*/) {
     if (g_is_interrupted.load()) {
         // second Ctrl+C - exit immediately
         // make sure to clear colors before exiting (not using LOG or console.cpp here to avoid deadlock)
@@ -119,9 +119,9 @@ struct cli_context {
                 out_result = std::move(result);
                 return curr_content;
             }
-            auto res_partial = dynamic_cast<server_task_result_cmpl_partial *>(result.get());
+            auto *res_partial = dynamic_cast<server_task_result_cmpl_partial *>(result.get());
             if (res_partial) {
-                out_timings = std::move(res_partial->timings);
+                out_timings = res_partial->timings;
                 for (const auto & diff : res_partial->oaicompat_msg_diffs) {
                     if (!diff.content_delta.empty()) {
                         if (is_thinking) {
@@ -144,9 +144,9 @@ struct cli_context {
                     }
                 }
             }
-            auto res_final = dynamic_cast<server_task_result_cmpl_final *>(result.get());
+            auto *res_final = dynamic_cast<server_task_result_cmpl_final *>(result.get());
             if (res_final) {
-                out_timings = std::move(res_final->timings);
+                out_timings = res_final->timings;
                 out_result  = std::move(result);
                 break;
             }
@@ -168,10 +168,8 @@ struct cli_context {
             buf.assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
             input_files.push_back(std::move(buf));
             return mtmd_default_marker();
-        } else {
-            std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
-            return content;
-        }
+        } 
+        return std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
     }
 
     common_chat_params format_chat() const {
@@ -365,7 +363,8 @@ int main(int argc, char ** argv) {
         // process commands
         if (string_starts_with(buffer, "/exit")) {
             break;
-        } else if (string_starts_with(buffer, "/regen")) {
+        } 
+        if (string_starts_with(buffer, "/regen")) {
             if (ctx_cli.messages.size() >= 2) {
                 size_t last_idx = ctx_cli.messages.size() - 1;
                 ctx_cli.messages.erase(last_idx);
@@ -418,7 +417,7 @@ int main(int argc, char ** argv) {
         while (true) {
             server_task_result_ptr result;
             std::string            assistant_content = ctx_cli.generate_completion(timings, result);
-            auto                   res_final         = dynamic_cast<server_task_result_cmpl_final *>(result.get());
+            auto *                 res_final         = dynamic_cast<server_task_result_cmpl_final *>(result.get());
 
             if (res_final && !res_final->oaicompat_msg.tool_calls.empty()) {
                 ctx_cli.messages.push_back(res_final->oaicompat_msg.to_json_oaicompat());
@@ -443,38 +442,36 @@ int main(int argc, char ** argv) {
                     }
 
                     std::string request_id = tc.name + tc.arguments;
-                    if (!mcp.get_yolo() && ctx_cli.approved_requests.find(request_id) == ctx_cli.approved_requests.end()) {
-                         // Prompt user
-                         fprintf(stdout, "\n\033[1;33mTool call: %s\033[0m\n", tc.name.c_str());
-                         fprintf(stdout, "Arguments: %s\n", args.dump(2).c_str());
-                         fprintf(stdout, "Approve? [y]es, [n]o, [A]lways allow feature: ");
-                         fflush(stdout);
+                    if (!mcp.get_yolo() &&
+                        ctx_cli.approved_requests.find(request_id) == ctx_cli.approved_requests.end()) {
+                        // Prompt user
+                        fprintf(stdout, "\n\n\033[1;33mTool call: %s\033[0m\n", tc.name.c_str());
+                        fprintf(stdout, "Arguments: %s\n", args.dump(2).c_str());
+                        fprintf(stdout, "Approve? [y]es, [n]o, [A]lways allow feature: ");
+                        fflush(stdout);
 
-                         char c = ' ';
-                         std::string line;
-                         // We are in main loop which might have console settings. 
-                         // Use console::readline or similar? 
-                         // But for single char input, standard cin/getline might interfere with console lib state if it's separate.
-                         // Given `console::readline` is used above, let's use standard input as fallback or just try cin.
-                         
-                         // Simple blocking read
-                         std::cin >> c;
-                         std::getline(std::cin, line); // consume rest
+                        char        c = ' ';
+                        std::string line;
 
-                         if (c == 'y' || c == 'Y') {
-                             // approved once
-                         } else if (c == 'A') {
-                             ctx_cli.approved_requests.insert(request_id);
-                         } else {
-                             json err_msg = {
-                                { "role",         "tool"                    },
+                        console::readline(line, false);
+                        if (!line.empty()) {
+                            c = line[0];
+                        }
+
+                        if (c == 'y' || c == 'Y') {
+                            // approved once
+                        } else if (c == 'A') {
+                            ctx_cli.approved_requests.insert(request_id);
+                        } else {
+                            json err_msg = {
+                                { "role",         "tool"                       },
                                 { "content",      "User denied tool execution" },
-                                { "tool_call_id", tc.id                     },
-                                { "name",         tc.name                   }
+                                { "tool_call_id", tc.id                        },
+                                { "name",         tc.name                      }
                             };
                             ctx_cli.messages.push_back(err_msg);
                             continue;
-                         }
+                        }
                     }
 
                     json res      = mcp.call_tool(tc.name, args);
diff --git a/tools/cli/mcp.hpp b/tools/cli/mcp.hpp
index 62662f1ef0..422cd2a807 100644
--- a/tools/cli/mcp.hpp
+++ b/tools/cli/mcp.hpp
@@ -188,6 +188,11 @@ class mcp_server {
     }
 
     json send_request(const std::string & method, const json & params = json::object()) {
+        if (!running) {
+            LOG_ERR("Cannot send request to %s: server not running\n", name.c_str());
+            return nullptr;
+        }
+
         int               id;
         std::future<json> future;
         {
@@ -206,13 +211,19 @@ class mcp_server {
         std::string req_str    = req.dump() + "\n";
         FILE *      stdin_file = subprocess_stdin(&process);
         if (stdin_file) {
+            LOG_DBG("MCP request to %s [id=%d]: %s\n", name.c_str(), id, method.c_str());
             fwrite(req_str.c_str(), 1, req_str.size(), stdin_file);
             fflush(stdin_file);
+        } else {
+            LOG_ERR("Cannot send request to %s: stdin is null\n", name.c_str());
+            std::lock_guard<std::mutex> lock(mutex);
+            pending_requests.erase(id);
+            return nullptr;
         }
 
         // Wait for response with timeout
-        if (future.wait_for(std::chrono::seconds(10)) == std::future_status::timeout) {
-            LOG_ERR("Timeout waiting for response from %s (method: %s)\n", name.c_str(), method.c_str());
+        if (future.wait_for(std::chrono::seconds(30)) == std::future_status::timeout) {
+            LOG_ERR("Timeout waiting for response from %s (method: %s, id: %d)\n", name.c_str(), method.c_str(), id);
             std::lock_guard<std::mutex> lock(mutex);
             pending_requests.erase(id);
             return nullptr;
@@ -310,6 +321,7 @@ class mcp_server {
 
   private:
     void read_loop() {
+        LOG_DBG("MCP read_loop started for %s\n", name.c_str());
         char buffer[4096];
         while (!stop_read && running) {
             unsigned bytes_read = subprocess_read_stdout(&process, buffer, sizeof(buffer));
@@ -321,7 +333,7 @@ class mcp_server {
                 // when stop() is called concurrently.
                 // Just break the loop. The process is likely dead or dying.
                 if (!stop_read) {
-                    LOG_ERR("MCP process died (stdout closed)\n");
+                    LOG_ERR("MCP server %s: read_loop exiting (stdout closed/EOF)\n", name.c_str());
                 }
                 running = false;
                 break;
@@ -339,28 +351,36 @@ class mcp_server {
 
                 try {
                     json msg = json::parse(line);
-                    if (msg.contains("id")) {
-                        // Response
-                        int id = msg["id"].get<int>();
+                    if (msg.contains("id") && !msg["id"].is_null()) {
+                        // Response - handle both int and string IDs (JSON-RPC allows both)
+                        int id;
+                        if (msg["id"].is_string()) {
+                            id = std::stoi(msg["id"].get<std::string>());
+                        } else {
+                            id = msg["id"].get<int>();
+                        }
                         std::lock_guard<std::mutex> lock(mutex);
                         if (pending_requests.count(id)) {
+                            LOG_DBG("MCP response received from %s [id=%d]\n", name.c_str(), id);
                             pending_requests[id].set_value(msg);
                             pending_requests.erase(id);
                         } else {
-                            // ID not found
+                            LOG_WRN("MCP response for unknown id %d from %s: %s\n", id, name.c_str(), line.c_str());
                         }
                     } else {
                         // Notification or request from server -> ignore for now or log
                         // MCP servers might send notifications (e.g. logging)
-                        LOG_ERR("MCP Notification from %s: %s\n", name.c_str(), line.c_str());
+                        LOG_INF("MCP Notification from %s: %s\n", name.c_str(), line.c_str());
                     }
                 } catch (const std::exception & e) {
                     // Not a full JSON yet? Or invalid?
                     // If it was a line, it should be valid JSON-RPC
-                    LOG_WRN("Failed to parse JSON from %s: %s\n", name.c_str(), e.what());
+                    LOG_WRN("Failed to parse JSON from %s: %s (line: %s)\n", name.c_str(), e.what(), line.c_str());
                 }
             }
         }
+        LOG_DBG("MCP read_loop exiting for %s (stop_read=%d, running=%d)\n",
+                name.c_str(), stop_read.load(), running.load());
     }
 
     void err_loop() {

From 47b7f0f1e3f30011fdc515cd82a9552cf56fb9dc Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 19:19:41 +0100
Subject: [PATCH 03/10] EC

---
 mcp_config.json   |  2 +-
 mcp_dummy.py      | 10 +++++-----
 tools/cli/cli.cpp |  4 ++--
 tools/cli/mcp.hpp |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mcp_config.json b/mcp_config.json
index d531881607..700b12ee9f 100644
--- a/mcp_config.json
+++ b/mcp_config.json
@@ -10,4 +10,4 @@
       ]
     }
   }
-}
\ No newline at end of file
+}
diff --git a/mcp_dummy.py b/mcp_dummy.py
index 1fff089407..b20035dd4e 100644
--- a/mcp_dummy.py
+++ b/mcp_dummy.py
@@ -17,13 +17,13 @@ def main():
                 req = json.loads(line)
             except json.JSONDecodeError:
                 continue
-            
+
             if "method" in req:
                 method = req["method"]
                 req_id = req.get("id")
-                
+
                 resp = {"jsonrpc": "2.0", "id": req_id}
-                
+
                 if method == "initialize":
                     resp["result"] = {
                         "protocolVersion": "2024-11-05",
@@ -51,9 +51,9 @@ def main():
                     params = req.get("params", {})
                     name = params.get("name")
                     args = params.get("arguments", {})
-                    
+
                     logging.info(f"Tool call: {name} with {args}")
-                    
+
                     content = [{"type": "text", "text": f"Weather in {args.get('location')} is 25C"}]
                     # For simplicity, return raw content or follow MCP spec?
                     # MCP spec: result: { content: [ {type: "text", text: "..."} ] }
diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp
index 1e63ab9aa5..0a4b096897 100644
--- a/tools/cli/cli.cpp
+++ b/tools/cli/cli.cpp
@@ -168,7 +168,7 @@ struct cli_context {
             buf.assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
             input_files.push_back(std::move(buf));
             return mtmd_default_marker();
-        } 
+        }
         return std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
     }
 
@@ -363,7 +363,7 @@ int main(int argc, char ** argv) {
         // process commands
         if (string_starts_with(buffer, "/exit")) {
             break;
-        } 
+        }
         if (string_starts_with(buffer, "/regen")) {
             if (ctx_cli.messages.size() >= 2) {
                 size_t last_idx = ctx_cli.messages.size() - 1;
diff --git a/tools/cli/mcp.hpp b/tools/cli/mcp.hpp
index 422cd2a807..8d8ce4de98 100644
--- a/tools/cli/mcp.hpp
+++ b/tools/cli/mcp.hpp
@@ -178,7 +178,7 @@ class mcp_server {
         if (err_thread.joinable()) {
             err_thread.join();
         }
-        
+
         // 5. Cleanup
         if (running) {
              subprocess_destroy(&process);

From 1a38f3311c4a258dc4f25b20c81c375fb6ba0606 Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 19:24:13 +0100
Subject: [PATCH 04/10] Silence ya pyright!

---
 mcp_dummy.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mcp_dummy.py b/mcp_dummy.py
index b20035dd4e..a910f53b6e 100644
--- a/mcp_dummy.py
+++ b/mcp_dummy.py
@@ -1,5 +1,4 @@
 import sys
-import os
 import json
 import logging
 

From f9ad19620a8304e2395a11aa9b48e9f2a822f5d9 Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 19:30:00 +0100
Subject: [PATCH 05/10] This wasn't supposed to be here...

---
 common/chat-parser.cpp | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
index b4460fd01f..29819e48d3 100644
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -1140,9 +1140,6 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
         return;
     }
 
-    // DEBUG: trace input
-    // fprintf(stderr, "hermes_parse: input='%s'\n", builder.input().substr(builder.pos()).c_str());
-
     static const common_regex open_regex(
         "(?:"
             "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
@@ -1163,7 +1160,6 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
     );
 
     while (auto res = builder.try_find_regex(open_regex)) {
-        // fprintf(stderr, "hermes_parse: found regex match\n");
         const auto & block_start = res->groups[1];
         std::string block_end = block_start.empty() ? "" : "```";
 
@@ -1176,23 +1172,16 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
 
             if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
                 if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
-                    fprintf(stderr, "hermes: json tool add failed or partial (partial=%d)\n", tool_call->is_partial);
                     throw common_chat_msg_partial_exception("incomplete tool call");
                 }
                 builder.consume_spaces();
-                // builder.consume_literal(close_tag); // Handle mismatched close tag gracefully?
-                if (!builder.try_consume_literal(close_tag)) {
-                     fprintf(stderr, "hermes: failed to consume close tag '%s'. Remaining: '%s'\n", close_tag.c_str(), builder.input().substr(builder.pos()).c_str());
-                     // If closing tag is missing, is it partial?
-                     throw common_chat_msg_partial_exception("missing close tag");
-                }
+                builder.consume_literal(close_tag);
                 builder.consume_spaces();
                 if (!block_end.empty()) {
                     builder.consume_literal(block_end);
                     builder.consume_spaces();
                 }
             } else {
-                fprintf(stderr, "hermes: failed to consume json\n");
                 throw common_chat_msg_partial_exception("failed to parse tool call");
             }
         } else {

From 37d705e3c7d12586723b57bcebe625a9310a90dd Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 19:32:29 +0100
Subject: [PATCH 06/10] Hardcoding my devel paths probably not a good idea, but
 hey, at least it isn't an API key!

---
 mcp_dummy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mcp_dummy.py b/mcp_dummy.py
index a910f53b6e..267199e8b2 100644
--- a/mcp_dummy.py
+++ b/mcp_dummy.py
@@ -2,7 +2,7 @@ import sys
 import json
 import logging
 
-logging.basicConfig(filename='/devel/tools/llama.cpp/mcp_dummy.log', level=logging.DEBUG)
+logging.basicConfig(filename='mcp_dummy.log', level=logging.DEBUG)
 
 def main():
     logging.info("Starting MCP Dummy Server")

From f9c0653e7de09a5852a3ed1f1fd9907892173634 Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 20:24:36 +0100
Subject: [PATCH 07/10] Fix flake8

---
 mcp_dummy.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mcp_dummy.py b/mcp_dummy.py
index 267199e8b2..32b8ff7716 100644
--- a/mcp_dummy.py
+++ b/mcp_dummy.py
@@ -4,6 +4,7 @@ import logging
 
 logging.basicConfig(filename='mcp_dummy.log', level=logging.DEBUG)
 
+
 def main():
     logging.info("Starting MCP Dummy Server")
     while True:
@@ -65,7 +66,7 @@ def main():
                 else:
                     # Ignore notifications or other methods
                     if req_id is not None:
-                         resp["error"] = {"code": -32601, "message": "Method not found"}
+                        resp["error"] = {"code": -32601, "message": "Method not found"}
                     else:
                         continue
 
@@ -77,5 +78,6 @@ def main():
             logging.error(f"Error: {e}")
             break
 
+
 if __name__ == "__main__":
     main()

From ed887e0d8c02d950904de94361d1b267a33b6ffe Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 20:29:29 +0100
Subject: [PATCH 08/10] Revert formatting changes

---
 tools/cli/cli.cpp | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp
index 0a4b096897..9993e96a8a 100644
--- a/tools/cli/cli.cpp
+++ b/tools/cli/cli.cpp
@@ -1,6 +1,5 @@
-#include "arg.h"
-#include "chat.h"
 #include "common.h"
+#include "arg.h"
 #include "console.h"
 // #include "log.h"
 
@@ -8,11 +7,10 @@
 #include "server-context.h"
 #include "server-task.h"
 
-#include <signal.h>
-
 #include <atomic>
 #include <fstream>
 #include <thread>
+#include <signal.h>
 
 #if defined(_WIN32)
 #    define WIN32_LEAN_AND_MEAN
@@ -22,7 +20,7 @@
 #    include <windows.h>
 #endif
 
-static const char * LLAMA_ASCII_LOGO = R"(
+const char * LLAMA_ASCII_LOGO = R"(
 ▄▄ ▄▄
 ██ ██
 ██ ██  ▀▀█▄ ███▄███▄  ▀▀█▄    ▄████ ████▄ ████▄
@@ -39,7 +37,7 @@ static bool should_stop() {
 }
 
 #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
-static void signal_handler(int /*unused*/) {
+static void signal_handler(int) {
     if (g_is_interrupted.load()) {
         // second Ctrl+C - exit immediately
         // make sure to clear colors before exiting (not using LOG or console.cpp here to avoid deadlock)
@@ -268,9 +266,11 @@ int main(int argc, char ** argv) {
     console::spinner::stop();
     console::log("\n");
 
-    std::thread inference_thread([&ctx_cli]() { ctx_cli.ctx_server.start_loop(); });
+    std::thread inference_thread([&ctx_cli]() {
+        ctx_cli.ctx_server.start_loop();
+    });
 
-    auto        inf        = ctx_cli.ctx_server.get_meta();
+    auto inf = ctx_cli.ctx_server.get_meta();
     std::string modalities = "text";
     if (inf.has_inp_image) {
         modalities += ", vision";
@@ -281,8 +281,8 @@ int main(int argc, char ** argv) {
 
     if (!params.system_prompt.empty()) {
         ctx_cli.messages.push_back({
-            { "role",    "system"             },
-            { "content", params.system_prompt }
+            {"role",    "system"},
+            {"content", params.system_prompt}
         });
     }
 
@@ -316,7 +316,7 @@ int main(int argc, char ** argv) {
         if (params.prompt.empty()) {
             console::log("\n> ");
             std::string line;
-            bool        another_line = true;
+            bool another_line = true;
             do {
                 another_line = console::readline(line, params.multiline_input);
                 buffer += line;
@@ -338,7 +338,7 @@ int main(int argc, char ** argv) {
             } else {
                 console::log("\n> %s\n", buffer.c_str());
             }
-            params.prompt.clear();  // only use it once
+            params.prompt.clear(); // only use it once
         }
         console::set_display(DISPLAY_TYPE_RESET);
         console::log("\n");
@@ -349,7 +349,7 @@ int main(int argc, char ** argv) {
         }
 
         // remove trailing newline
-        if (!buffer.empty() && buffer.back() == '\n') {
+        if (!buffer.empty() &&buffer.back() == '\n') {
             buffer.pop_back();
         }
 
@@ -378,10 +378,11 @@ int main(int argc, char ** argv) {
             ctx_cli.input_files.clear();
             console::log("Chat history cleared.\n");
             continue;
-        } else if ((string_starts_with(buffer, "/image ") && inf.has_inp_image) ||
-                   (string_starts_with(buffer, "/audio ") && inf.has_inp_audio)) {
+        } else if (
+                (string_starts_with(buffer, "/image ") && inf.has_inp_image) ||
+                (string_starts_with(buffer, "/audio ") && inf.has_inp_audio)) {
             // just in case (bad copy-paste for example), we strip all trailing/leading spaces
-            std::string fname  = string_strip(buffer.substr(7));
+            std::string fname = string_strip(buffer.substr(7));
             std::string marker = ctx_cli.load_input_file(fname, true);
             if (marker.empty()) {
                 console::error("file does not exist or cannot be opened: '%s'\n", fname.c_str());
@@ -391,7 +392,7 @@ int main(int argc, char ** argv) {
             console::log("Loaded media from '%s'\n", fname.c_str());
             continue;
         } else if (string_starts_with(buffer, "/read ")) {
-            std::string fname  = string_strip(buffer.substr(6));
+            std::string fname = string_strip(buffer.substr(6));
             std::string marker = ctx_cli.load_input_file(fname, false);
             if (marker.empty()) {
                 console::error("file does not exist or cannot be opened: '%s'\n", fname.c_str());
@@ -408,8 +409,8 @@ int main(int argc, char ** argv) {
         // generate response
         if (add_user_msg) {
             ctx_cli.messages.push_back({
-                { "role",    "user"  },
-                { "content", cur_msg }
+                {"role",    "user"},
+                {"content", cur_msg}
             });
             cur_msg.clear();
         }
@@ -486,8 +487,8 @@ int main(int argc, char ** argv) {
                 // continue loop to generate with tool results
             } else {
                 json assistant_msg = {
-                    { "role",    "assistant"       },
-                    { "content", assistant_content }
+                    {"role",   "assistant"},
+                    {"content", assistant_content}
                 };
                 ctx_cli.messages.push_back(assistant_msg);
                 break;
@@ -498,8 +499,7 @@ int main(int argc, char ** argv) {
         if (params.show_timings) {
             console::set_display(DISPLAY_TYPE_INFO);
             console::log("\n");
-            console::log("[ Prompt: %.1f t/s | Generation: %.1f t/s ]\n", timings.prompt_per_second,
-                         timings.predicted_per_second);
+            console::log("[ Prompt: %.1f t/s | Generation: %.1f t/s ]\n", timings.prompt_per_second, timings.predicted_per_second);
             console::set_display(DISPLAY_TYPE_RESET);
         }
 

From 17919e700dc29d07881a4f0ca55f83ffa7c252fb Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 20:48:01 +0100
Subject: [PATCH 09/10] Revert formatting cd.

---
 tools/cli/cli.cpp | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp
index 9993e96a8a..838128fe04 100644
--- a/tools/cli/cli.cpp
+++ b/tools/cli/cli.cpp
@@ -13,11 +13,11 @@
 #include <signal.h>
 
 #if defined(_WIN32)
-#    define WIN32_LEAN_AND_MEAN
-#    ifndef NOMINMAX
-#        define NOMINMAX
-#    endif
-#    include <windows.h>
+#define WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+#   define NOMINMAX
+#endif
+#include <windows.h>
 #endif
 
 const char * LLAMA_ASCII_LOGO = R"(
@@ -31,12 +31,11 @@ const char * LLAMA_ASCII_LOGO = R"(
 )";
 
 static std::atomic<bool> g_is_interrupted = false;
-
 static bool should_stop() {
     return g_is_interrupted.load();
 }
 
-#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
+#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
 static void signal_handler(int) {
     if (g_is_interrupted.load()) {
         // second Ctrl+C - exit immediately
@@ -67,8 +66,8 @@ struct cli_context {
         defaults.n_predict   = params.n_predict;
         defaults.antiprompt  = params.antiprompt;
 
-        defaults.stream                                 = true;  // make sure we always use streaming mode
-        defaults.timings_per_token                      = true;  // in order to get timings even when we cancel mid-way
+        defaults.stream = true; // make sure we always use streaming mode
+        defaults.timings_per_token = true; // in order to get timings even when we cancel mid-way
         // defaults.return_progress = true; // TODO: show progress
     }
 
@@ -92,7 +91,7 @@ struct cli_context {
                 task.params.chat_parser_params.parser.load(chat_params.parser);
             }
 
-            rd.post_task({ std::move(task) });
+            rd.post_task({std::move(task)});
         }
 
         // wait for first result
@@ -101,7 +100,7 @@ struct cli_context {
 
         console::spinner::stop();
         std::string curr_content;
-        bool        is_thinking = false;
+        bool is_thinking = false;
 
         while (result) {
             if (should_stop()) {
@@ -193,7 +192,7 @@ struct cli_context {
 int main(int argc, char ** argv) {
     common_params params;
 
-    params.verbosity = LOG_LEVEL_ERROR;  // by default, less verbose logs
+    params.verbosity = LOG_LEVEL_ERROR; // by default, less verbose logs
 
     if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CLI)) {
         return 1;
@@ -219,14 +218,14 @@ int main(int argc, char ** argv) {
 
     console::set_display(DISPLAY_TYPE_RESET);
 
-#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
+#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
     struct sigaction sigint_action;
     sigint_action.sa_handler = signal_handler;
-    sigemptyset(&sigint_action.sa_mask);
+    sigemptyset (&sigint_action.sa_mask);
     sigint_action.sa_flags = 0;
     sigaction(SIGINT, &sigint_action, NULL);
     sigaction(SIGTERM, &sigint_action, NULL);
-#elif defined(_WIN32)
+#elif defined (_WIN32)
     auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
         return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
     };

From 9e60c0c83ff3dd8a618c6448508d8a38bc463284 Mon Sep 17 00:00:00 2001
From: Piotr Wilkin <piotr.wilkin@syndatis.com>
Date: Thu, 5 Feb 2026 21:28:24 +0100
Subject: [PATCH 10/10] Separation of concerns (separate transport and comms
 layer)

---
 tools/cli/mcp.hpp | 356 ++++++++++++++++++++++++++--------------------
 1 file changed, 203 insertions(+), 153 deletions(-)

diff --git a/tools/cli/mcp.hpp b/tools/cli/mcp.hpp
index 8d8ce4de98..e5d0d9d6c8 100644
--- a/tools/cli/mcp.hpp
+++ b/tools/cli/mcp.hpp
@@ -1,12 +1,11 @@
 #pragma once
 
 #include "../../vendor/sheredom/subprocess.h"
-#include "common.h"
 #include "log.h"
 
 #include <atomic>
-#include <condition_variable>
 #include <fstream>
+#include <functional>
 #include <future>
 #include <iostream>
 #include <map>
@@ -26,7 +25,9 @@ struct mcp_tool {
     std::string server_name;
 };
 
-class mcp_server {
+// Low-level transport layer for subprocess-based MCP communication.
+// Handles subprocess lifecycle, raw I/O, and line-based message framing.
+class mcp_transport {
     std::string                        name;
     std::string                        command;
     std::vector<std::string>           args;
@@ -38,27 +39,24 @@ class mcp_server {
     std::thread         err_thread;
     std::atomic<bool>   stop_read{ false };
 
-    std::mutex                        mutex;
-    int                               next_id = 1;
-    std::map<int, std::promise<json>> pending_requests;
-
-    // Buffer for reading
     std::string read_buffer;
 
+    std::function<void(const std::string &)> line_handler;
+
   public:
-    mcp_server(const std::string &                        name,
-               const std::string &                        cmd,
-               const std::vector<std::string> &           args,
-               const std::map<std::string, std::string> & env) :
+    mcp_transport(const std::string &                        name,
+                  const std::string &                        cmd,
+                  const std::vector<std::string> &           args,
+                  const std::map<std::string, std::string> & env) :
         name(name),
         command(cmd),
         args(args),
         env(env) {}
 
-    mcp_server(const mcp_server &)            = delete;
-    mcp_server & operator=(const mcp_server &) = delete;
+    mcp_transport(const mcp_transport &)            = delete;
+    mcp_transport & operator=(const mcp_transport &) = delete;
 
-    mcp_server(mcp_server && other) noexcept :
+    mcp_transport(mcp_transport && other) noexcept :
         name(std::move(other.name)),
         command(std::move(other.command)),
         args(std::move(other.args)),
@@ -68,39 +66,41 @@ class mcp_server {
         read_thread(std::move(other.read_thread)),
         err_thread(std::move(other.err_thread)),
         stop_read(other.stop_read.load()),
-        next_id(other.next_id),
-        pending_requests(std::move(other.pending_requests)),
-        read_buffer(std::move(other.read_buffer)) {
-        // Zero out the source process to prevent double-free
+        read_buffer(std::move(other.read_buffer)),
+        line_handler(std::move(other.line_handler)) {
         other.process = {};
         other.running = false;
     }
 
-    mcp_server & operator=(mcp_server && other) noexcept {
+    mcp_transport & operator=(mcp_transport && other) noexcept {
         if (this != &other) {
-            stop(); // Clean up current resources
+            stop();
 
-            name             = std::move(other.name);
-            command          = std::move(other.command);
-            args             = std::move(other.args);
-            env              = std::move(other.env);
-            process          = other.process;
-            running          = other.running.load();
-            read_thread      = std::move(other.read_thread);
-            err_thread       = std::move(other.err_thread);
-            stop_read        = other.stop_read.load();
-            next_id          = other.next_id;
-            pending_requests = std::move(other.pending_requests);
-            read_buffer      = std::move(other.read_buffer);
+            name         = std::move(other.name);
+            command      = std::move(other.command);
+            args         = std::move(other.args);
+            env          = std::move(other.env);
+            process      = other.process;
+            running      = other.running.load();
+            read_thread  = std::move(other.read_thread);
+            err_thread   = std::move(other.err_thread);
+            stop_read    = other.stop_read.load();
+            read_buffer  = std::move(other.read_buffer);
+            line_handler = std::move(other.line_handler);
 
-            // Zero out source
             other.process = {};
             other.running = false;
         }
         return *this;
     }
 
-    ~mcp_server() { stop(); }
+    ~mcp_transport() { stop(); }
+
+    void set_line_handler(std::function<void(const std::string &)> handler) {
+        line_handler = std::move(handler);
+    }
+
+    bool is_running() const { return running; }
 
     bool start() {
         std::vector<const char *> cmd_args;
@@ -122,7 +122,6 @@ class mcp_server {
             env_vars.push_back(nullptr);
         }
 
-        // Blocking I/O is simpler with threads
         int options = subprocess_option_search_user_path;
         int result;
         if (env.empty()) {
@@ -138,14 +137,16 @@ class mcp_server {
         }
 
         running     = true;
-        read_thread = std::thread(&mcp_server::read_loop, this);
-        err_thread  = std::thread(&mcp_server::err_loop, this);
+        read_thread = std::thread(&mcp_transport::read_loop, this);
+        err_thread  = std::thread(&mcp_transport::err_loop, this);
 
         return true;
     }
 
     void stop() {
-        if (!running) return;
+        if (!running) {
+            return;
+        }
         LOG_INF("Stopping MCP server %s...\n", name.c_str());
         stop_read = true;
 
@@ -157,12 +158,12 @@ class mcp_server {
 
         // 2. Wait for 10 seconds for normal termination
         bool terminated = false;
-        for (int i = 0; i < 100; ++i) { // 100 * 100ms = 10s
-             if (subprocess_alive(&process) == 0) {
-                 terminated = true;
-                 break;
-             }
-             std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        for (int i = 0; i < 100; ++i) {  // 100 * 100ms = 10s
+            if (subprocess_alive(&process) == 0) {
+                terminated = true;
+                break;
+            }
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
         }
 
         // 3. Terminate if still running
@@ -181,14 +182,131 @@ class mcp_server {
 
         // 5. Cleanup
         if (running) {
-             subprocess_destroy(&process);
-             running = false;
+            subprocess_destroy(&process);
+            running = false;
         }
         LOG_INF("MCP server %s stopped.\n", name.c_str());
     }
 
+    bool send(const std::string & data) {
+        FILE * f = subprocess_stdin(&process);
+        if (!f) {
+            return false;
+        }
+        fwrite(data.c_str(), 1, data.size(), f);
+        fflush(f);
+        return true;
+    }
+
+  private:
+    void read_loop() {
+        LOG_DBG("MCP read_loop started for %s\n", name.c_str());
+        char buffer[4096];
+        while (!stop_read && running) {
+            unsigned bytes_read = subprocess_read_stdout(&process, buffer, sizeof(buffer));
+
+            if (bytes_read == 0) {
+                // If blocking read returns 0, it means EOF (process exited or pipe closed).
+                // We should NOT call subprocess_alive() here because it calls subprocess_join()
+                // which modifies the process struct (closes stdin) and causes race conditions/double-free
+                // when stop() is called concurrently.
+                // Just break the loop. The process is likely dead or dying.
+                if (!stop_read) {
+                    LOG_ERR("MCP server %s: read_loop exiting (stdout closed/EOF)\n", name.c_str());
+                }
+                running = false;
+                break;
+            }
+
+            read_buffer.append(buffer, bytes_read);
+            size_t pos;
+            while ((pos = read_buffer.find('\n')) != std::string::npos) {
+                std::string line = read_buffer.substr(0, pos);
+                read_buffer.erase(0, pos + 1);
+
+                if (line.empty()) {
+                    continue;
+                }
+
+                if (line_handler) {
+                    line_handler(line);
+                }
+            }
+        }
+        LOG_DBG("MCP read_loop exiting for %s (stop_read=%d, running=%d)\n", name.c_str(), stop_read.load(),
+                running.load());
+    }
+
+    void err_loop() {
+        char buffer[1024];
+        while (!stop_read && running) {
+            unsigned bytes_read = subprocess_read_stderr(&process, buffer, sizeof(buffer));
+            if (bytes_read > 0) {
+                if (stop_read) {
+                    break;  // Don't log stderr during shutdown
+                }
+                std::string err_str(buffer, bytes_read);
+                LOG_WRN("[%s stderr] %s", name.c_str(), err_str.c_str());
+            } else {
+                // EOF
+                break;
+            }
+        }
+    }
+};
+
+// JSON-RPC layer for MCP server communication.
+// Handles request/response correlation, message formatting, and MCP protocol methods.
+class mcp_server {
+    std::string   name;
+    mcp_transport transport;
+
+    std::mutex                        mutex;
+    int                               next_id = 1;
+    std::map<int, std::promise<json>> pending_requests;
+
+  public:
+    mcp_server(const std::string &                        name,
+               const std::string &                        cmd,
+               const std::vector<std::string> &           args,
+               const std::map<std::string, std::string> & env) :
+        name(name),
+        transport(name, cmd, args, env) {}
+
+    mcp_server(const mcp_server &)            = delete;
+    mcp_server & operator=(const mcp_server &) = delete;
+
+    mcp_server(mcp_server && other) noexcept :
+        name(std::move(other.name)),
+        transport(std::move(other.transport)),
+        next_id(other.next_id),
+        pending_requests(std::move(other.pending_requests)) {}
+
+    mcp_server & operator=(mcp_server && other) noexcept {
+        if (this != &other) {
+            stop();
+
+            name             = std::move(other.name);
+            transport        = std::move(other.transport);
+            next_id          = other.next_id;
+            pending_requests = std::move(other.pending_requests);
+        }
+        return *this;
+    }
+
+    ~mcp_server() { stop(); }
+
+    bool start() {
+        transport.set_line_handler([this](const std::string & line) {
+            handle_line(line);
+        });
+        return transport.start();
+    }
+
+    void stop() { transport.stop(); }
+
     json send_request(const std::string & method, const json & params = json::object()) {
-        if (!running) {
+        if (!transport.is_running()) {
             LOG_ERR("Cannot send request to %s: server not running\n", name.c_str());
             return nullptr;
         }
@@ -208,13 +326,9 @@ class mcp_server {
             { "params",  params }
         };
 
-        std::string req_str    = req.dump() + "\n";
-        FILE *      stdin_file = subprocess_stdin(&process);
-        if (stdin_file) {
-            LOG_DBG("MCP request to %s [id=%d]: %s\n", name.c_str(), id, method.c_str());
-            fwrite(req_str.c_str(), 1, req_str.size(), stdin_file);
-            fflush(stdin_file);
-        } else {
+        std::string req_str = req.dump() + "\n";
+        LOG_DBG("MCP request to %s [id=%d]: %s\n", name.c_str(), id, method.c_str());
+        if (!transport.send(req_str)) {
             LOG_ERR("Cannot send request to %s: stdin is null\n", name.c_str());
             std::lock_guard<std::mutex> lock(mutex);
             pending_requests.erase(id);
@@ -239,29 +353,19 @@ class mcp_server {
             { "params",  params }
         };
 
-        std::string req_str    = req.dump() + "\n";
-        FILE *      stdin_file = subprocess_stdin(&process);
-        if (stdin_file) {
-            fwrite(req_str.c_str(), 1, req_str.size(), stdin_file);
-            fflush(stdin_file);
-        }
+        transport.send(req.dump() + "\n");
     }
 
     // Initialize handshake
     bool initialize() {
         // Send initialize
         json init_params = {
-            { "protocolVersion", "2024-11-05" },
-            { "capabilities", {
-                { "roots", {
-                    { "listChanged", false }
-                } },
-                { "sampling", json::object() }
-            } },
-            { "clientInfo", {
-                { "name", "llama.cpp-cli" },
-                { "version", "0.1.0" }  // TODO: use real version
-            } }
+            { "protocolVersion", "2024-11-05"                                                                  },
+            { "capabilities",    { { "roots", { { "listChanged", false } } }, { "sampling", json::object() } } },
+            { "clientInfo",
+             {
+                  { "name", "llama.cpp-cli" }, { "version", "0.1.0" }  // TODO: use real version
+              }                                                                                                }
         };
 
         json res = send_request("initialize", init_params);
@@ -320,83 +424,34 @@ class mcp_server {
     }
 
   private:
-    void read_loop() {
-        LOG_DBG("MCP read_loop started for %s\n", name.c_str());
-        char buffer[4096];
-        while (!stop_read && running) {
-            unsigned bytes_read = subprocess_read_stdout(&process, buffer, sizeof(buffer));
-
-            if (bytes_read == 0) {
-                // If blocking read returns 0, it means EOF (process exited or pipe closed).
-                // We should NOT call subprocess_alive() here because it calls subprocess_join()
-                // which modifies the process struct (closes stdin) and causes race conditions/double-free
-                // when stop() is called concurrently.
-                // Just break the loop. The process is likely dead or dying.
-                if (!stop_read) {
-                    LOG_ERR("MCP server %s: read_loop exiting (stdout closed/EOF)\n", name.c_str());
+    void handle_line(const std::string & line) {
+        try {
+            json msg = json::parse(line);
+            if (msg.contains("id") && !msg["id"].is_null()) {
+                // Response - handle both int and string IDs (JSON-RPC allows both)
+                int id;
+                if (msg["id"].is_string()) {
+                    id = std::stoi(msg["id"].get<std::string>());
+                } else {
+                    id = msg["id"].get<int>();
                 }
-                running = false;
-                break;
-            }
-
-            read_buffer.append(buffer, bytes_read);
-            size_t pos;
-            while ((pos = read_buffer.find('\n')) != std::string::npos) {
-                std::string line = read_buffer.substr(0, pos);
-                read_buffer.erase(0, pos + 1);
-
-                if (line.empty()) {
-                    continue;
+                std::lock_guard<std::mutex> lock(mutex);
+                if (pending_requests.count(id)) {
+                    LOG_DBG("MCP response received from %s [id=%d]\n", name.c_str(), id);
+                    pending_requests[id].set_value(msg);
+                    pending_requests.erase(id);
+                } else {
+                    LOG_WRN("MCP response for unknown id %d from %s: %s\n", id, name.c_str(), line.c_str());
                 }
-
-                try {
-                    json msg = json::parse(line);
-                    if (msg.contains("id") && !msg["id"].is_null()) {
-                        // Response - handle both int and string IDs (JSON-RPC allows both)
-                        int id;
-                        if (msg["id"].is_string()) {
-                            id = std::stoi(msg["id"].get<std::string>());
-                        } else {
-                            id = msg["id"].get<int>();
-                        }
-                        std::lock_guard<std::mutex> lock(mutex);
-                        if (pending_requests.count(id)) {
-                            LOG_DBG("MCP response received from %s [id=%d]\n", name.c_str(), id);
-                            pending_requests[id].set_value(msg);
-                            pending_requests.erase(id);
-                        } else {
-                            LOG_WRN("MCP response for unknown id %d from %s: %s\n", id, name.c_str(), line.c_str());
-                        }
-                    } else {
-                        // Notification or request from server -> ignore for now or log
-                        // MCP servers might send notifications (e.g. logging)
-                        LOG_INF("MCP Notification from %s: %s\n", name.c_str(), line.c_str());
-                    }
-                } catch (const std::exception & e) {
-                    // Not a full JSON yet? Or invalid?
-                    // If it was a line, it should be valid JSON-RPC
-                    LOG_WRN("Failed to parse JSON from %s: %s (line: %s)\n", name.c_str(), e.what(), line.c_str());
-                }
-            }
-        }
-        LOG_DBG("MCP read_loop exiting for %s (stop_read=%d, running=%d)\n",
-                name.c_str(), stop_read.load(), running.load());
-    }
-
-    void err_loop() {
-        char buffer[1024];
-        while (!stop_read && running) {
-            unsigned bytes_read = subprocess_read_stderr(&process, buffer, sizeof(buffer));
-            if (bytes_read > 0) {
-                if (stop_read) break; // Don't log stderr during shutdown
-                std::string err_str(buffer, bytes_read);
-                // Filter out empty/whitespace-only stderr if desired, or just keep it.
-                // User said "extra logging that passes the stderr here is unnecessary" referring to shutdown.
-                LOG_WRN("[%s stderr] %s", name.c_str(), err_str.c_str());
             } else {
-                // EOF
-                break;
+                // Notification or request from server -> ignore for now or log
+                // MCP servers might send notifications (e.g. logging)
+                LOG_INF("MCP Notification from %s: %s\n", name.c_str(), line.c_str());
             }
+        } catch (const std::exception & e) {
+            // Not a full JSON yet? Or invalid?
+            // If it was a line, it should be valid JSON-RPC
+            LOG_WRN("Failed to parse JSON from %s: %s (line: %s)\n", name.c_str(), e.what(), line.c_str());
         }
     }
 };
@@ -434,19 +489,14 @@ class mcp_context {
         if (config.contains("mcpServers")) {
             std::string server_list;
             for (auto & [key, val] : config["mcpServers"].items()) {
-                if (!server_list.empty()) server_list += ", ";
+                if (!server_list.empty()) {
+                    server_list += ", ";
+                }
                 server_list += key;
             }
             LOG_INF("MCP configuration found with servers: %s\n", server_list.c_str());
 
             for (auto & [key, val] : config["mcpServers"].items()) {
-                // If enabled_servers_str is empty, enable all? User said "possibility to pick which MCP servers to enable".
-                // If the user specifies explicit list, we filter. If not, maybe we shouldn't enable any or enable all?
-                // The prompt says "possibility to pick".
-                // Let's assume if list provided, use it. If not, enable all? Or none?
-                // User provided example implies explicit enabling might be desired.
-                // Let's assume if `enabled_servers_str` is not empty, we filter.
-
                 bool enabled = true;
                 if (!enabled_list.empty()) {
                     bool found = false;