diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 295ae9ea25..689fd367da 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -48,10 +48,10 @@ add_library(${TARGET} STATIC
     arg.cpp
     arg.h
     base64.hpp
-    chat-parser.cpp
-    chat-parser.h
-    chat-parser-xml-toolcall.h
-    chat-parser-xml-toolcall.cpp
+    chat-auto-parser-analyzer.cpp
+    chat-auto-parser-generator.cpp
+    chat-auto-parser-helpers.cpp
+    chat-auto-parser.h
     chat-peg-parser.cpp
     chat-peg-parser.h
     chat.cpp
diff --git a/common/chat-auto-parser-analyzer.cpp b/common/chat-auto-parser-analyzer.cpp
new file mode 100644
index 0000000000..db6aa2c547
--- /dev/null
+++ b/common/chat-auto-parser-analyzer.cpp
@@ -0,0 +1,1461 @@
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::ordered_json;
+
+template_analysis_result template_analyzer::analyze_template(const common_chat_template & tmpl) {
+    LOG_DBG("=== STARTING UNIFIED TEMPLATE ANALYSIS ===\n");
+
+    template_analysis_result result;
+
+    // Phase 1: Analyze content and reasoning structure (no tools involved)
+    result.content = analyze_content_structure(tmpl);
+
+    // Phase 2: Analyze tool call structure (layered on Phase 1)
+    result.tools = analyze_tool_structure(tmpl, result.content);
+
+    // Post-processing: Extract reasoning markers from tool_section_start if Phase 1 didn't detect them
+    // Some templates (like Command-R7B) include reasoning markers in tool outputs but not in prompts
+    if (result.content.reasoning_start.empty() && !result.tools.tool_section_start.empty()) {
+        // Known reasoning end marker patterns that might be embedded in tool_section_start
+        std::vector<std::pair<std::string, std::string>> reasoning_patterns = {
+            { "<|START_THINKING|>", "<|END_THINKING|>" },
+            { "<|START_THOUGHT|>",  "<|END_THOUGHT|>"  },
+            { "<|START_REASON|>",   "<|END_REASON|>"   },
+            { "<think>",            "</think>"         },
+            { "<thinking>",         "</thinking>"      },
+        };
+
+        for (const auto & [start_marker, end_marker] : reasoning_patterns) {
+            size_t end_pos = result.tools.tool_section_start.find(end_marker);
+            if (end_pos != std::string::npos) {
+                // Found reasoning end marker in tool_section_start
+                // Extract it and clean up tool_section_start
+                result.content.reasoning_start = start_marker;
+                result.content.reasoning_end   = end_marker;
+                result.content.reasoning_mode  = content_structure::REASONING_OPTIONAL;
+
+                // Clean up tool_section_start: remove everything before and including the end marker
+                size_t after_end = end_pos + end_marker.length();
+                if (after_end < result.tools.tool_section_start.length()) {
+                    result.tools.tool_section_start = result.tools.tool_section_start.substr(after_end);
+                    // Trim leading whitespace
+                    size_t first_non_ws             = result.tools.tool_section_start.find_first_not_of(" \t\n\r");
+                    if (first_non_ws != std::string::npos && first_non_ws > 0) {
+                        result.tools.tool_section_start = result.tools.tool_section_start.substr(first_non_ws);
+                    }
+                }
+
+                LOG_DBG("Post-processing: Extracted reasoning markers from tool_section_start\n");
+                LOG_DBG("  reasoning_start: '%s', reasoning_end: '%s'\n", result.content.reasoning_start.c_str(),
+                        result.content.reasoning_end.c_str());
+                LOG_DBG("  cleaned tool_section_start: '%s'\n", result.tools.tool_section_start.c_str());
+                break;
+            }
+        }
+    }
+
+    // Post-processing: Detect content markers for recipient-based format
+    // For recipient-based format, content is prefixed with tool_call_start_marker + recipient_name + \n
+    // (e.g., ">>>all\n"). We need to detect and extract this as the content_start marker.
+    if (result.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED &&
+        result.content.content_start.empty() && !result.tools.tool_section_start.empty()) {
+        // Render template with content only (no tools) to detect the content marker
+        templates_params inputs;
+        inputs.messages = {
+            { { "role", "user" },      { "content", "Hello" }               },
+            { { "role", "assistant" }, { "content", "ACTUAL_CONTENT_HERE" } }
+        };
+        inputs.add_generation_prompt = true;
+
+        std::string output;
+        try {
+            output = common_chat_template_direct_apply(tmpl, inputs);
+        } catch (...) {
+            output = "";
+        }
+
+        if (!output.empty()) {
+            // Find where the actual content starts
+            size_t content_pos = output.find("ACTUAL_CONTENT_HERE");
+
+            if (content_pos != std::string::npos) {
+                // For recipient-based format, find the last occurrence of tool_call_start_marker
+                // before the content. The marker is from that position to the content (including the newline).
+                size_t marker_pos = output.rfind(result.tools.tool_section_start, content_pos);
+
+                if (marker_pos != std::string::npos && marker_pos < content_pos) {
+                    // Find the newline after the marker
+                    size_t newline_pos = output.find('\n', marker_pos);
+
+                    if (newline_pos != std::string::npos && newline_pos < content_pos) {
+                        // Extract everything up to and including the newline after the marker
+                        std::string detected_marker = output.substr(marker_pos, newline_pos - marker_pos + 1);
+
+                        // Verify the marker starts with tool_call_start_marker
+                        if (detected_marker.find(result.tools.tool_section_start) == 0) {
+                            result.content.content_start = detected_marker;
+                            result.content.content_mode  = content_structure::CONTENT_ALWAYS_WRAPPED;
+                            LOG_DBG("Post-processing: Detected recipient-based content marker: '%s'\n",
+                                    result.content.content_start.c_str());
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Collect preserved tokens from both phases
+    collect_preserved_tokens(result);
+
+    LOG_DBG("=== UNIFIED TEMPLATE ANALYSIS COMPLETE ===\n");
+    LOG_DBG("Content structure:\n");
+    LOG_DBG("  reasoning_mode: %d\n", static_cast<int>(result.content.reasoning_mode));
+    LOG_DBG("  reasoning_start: '%s'\n", result.content.reasoning_start.c_str());
+    LOG_DBG("  reasoning_end: '%s'\n", result.content.reasoning_end.c_str());
+    LOG_DBG("  content_mode: %d\n", static_cast<int>(result.content.content_mode));
+    LOG_DBG("  content_start: '%s'\n", result.content.content_start.c_str());
+    LOG_DBG("  content_end: '%s'\n", result.content.content_end.c_str());
+    LOG_DBG("Tool structure:\n");
+    LOG_DBG("  supports_tools: %s\n", result.tools.supports_tools ? "true" : "false");
+    LOG_DBG("  function_format: %d\n", static_cast<int>(result.tools.function_format));
+    LOG_DBG("  argument_format: %d\n", static_cast<int>(result.tools.argument_format));
+    LOG_DBG("  tool_section_start: '%s'\n", result.tools.tool_section_start.c_str());
+    LOG_DBG("  tool_section_end: '%s'\n", result.tools.tool_section_end.c_str());
+
+    return result;
+}
+
+content_structure template_analyzer::analyze_content_structure(const common_chat_template & tmpl) {
+    LOG_DBG("=== PHASE 1: ANALYZING CONTENT STRUCTURE ===\n");
+
+    content_structure cs;
+
+    // Step 1: Detect reasoning markers by toggling enable_thinking
+    detect_reasoning_markers(tmpl, cs);
+
+    // Step 2: Detect content wrapping markers
+    detect_content_markers(tmpl, cs);
+
+    // Step 3: Determine reasoning mode (NONE, OPTIONAL, FORCED_OPEN)
+    templates_params inputs;
+    inputs.messages = {
+        { { "role", "user" }, { "content", "Hello" } }
+    };
+    inputs.add_generation_prompt            = true;
+    inputs.enable_thinking                  = true;
+
+    std::string prompt;
+    try {
+        prompt = common_chat_template_direct_apply(tmpl, inputs);
+    } catch (...) {
+        LOG_DBG("Failed to render template for reasoning mode detection\n");
+        return cs;
+    }
+
+    cs.reasoning_mode = detect_reasoning_mode(cs, prompt);
+
+    LOG_DBG("Phase 1 complete: reasoning_mode=%d, content_mode=%d\n", static_cast<int>(cs.reasoning_mode),
+            static_cast<int>(cs.content_mode));
+
+    return cs;
+}
+
+void template_analyzer::detect_reasoning_markers(const common_chat_template & tmpl, content_structure & cs) {
+    LOG_DBG("=== DETECTING REASONING MARKERS ===\n");
+
+    // Method 1: Compare outputs with reasoning_content field present vs absent
+    json reasoning_msg = {
+        { "role",              "assistant"      },
+        { "content",           "CONTENT_MARKER" },
+        { "reasoning_content", "THOUGHT_MARKER" }
+    };
+
+    json base_msg = {
+        { "role",    "assistant"      },
+        { "content", "CONTENT_MARKER" }
+    };
+
+    templates_params inputs;
+
+    inputs.messages = { reasoning_msg };
+    std::string reasoning_output;
+    try {
+        reasoning_output = common_chat_template_direct_apply(tmpl, inputs);
+    } catch (...) {
+        LOG_DBG("Failed to render template with reasoning_content\n");
+        reasoning_output = "";
+    }
+
+    inputs.messages = { base_msg };
+    std::string base_output;
+    try {
+        base_output = common_chat_template_direct_apply(tmpl, inputs);
+    } catch (...) {
+        LOG_DBG("Failed to render base template\n");
+        base_output = "";
+    }
+
+    // If outputs differ and we can find THOUGHT_MARKER, extract the reasoning markers
+    if (!reasoning_output.empty() && reasoning_output != base_output) {
+        size_t thought_pos = reasoning_output.find("THOUGHT_MARKER");
+        size_t content_pos = reasoning_output.find("CONTENT_MARKER");
+
+        if (thought_pos != std::string::npos && content_pos != std::string::npos && content_pos > thought_pos) {
+            // Extract what's between THOUGHT_MARKER and CONTENT_MARKER as the end marker
+            size_t thought_end = thought_pos + strlen("THOUGHT_MARKER");
+            cs.reasoning_end   = reasoning_output.substr(thought_end, content_pos - thought_end);
+
+            // Find what's before THOUGHT_MARKER by comparing with base_output
+            size_t diff_start = 0;
+            while (diff_start < base_output.length() && diff_start < reasoning_output.length() &&
+                   base_output[diff_start] == reasoning_output[diff_start]) {
+                diff_start++;
+            }
+
+            // If diff_start is in the middle of a tag (previous char is '<'), back up to include it
+            // This handles cases like base="</think>" vs reasoning="<think>" where both share '<'
+            if (diff_start > 0 && diff_start < reasoning_output.length() &&
+                reasoning_output[diff_start - 1] == '<') {
+                diff_start--;
+            }
+
+            if (diff_start < thought_pos) {
+                cs.reasoning_start = reasoning_output.substr(diff_start, thought_pos - diff_start);
+            }
+
+            trim_whitespace(cs.reasoning_start);
+            trim_whitespace(cs.reasoning_end);
+
+            // If we found reasoning_end but not reasoning_start, try to derive it from reasoning_end
+            // For example: </think> -> <think>, </|END_THINKING|> -> <|START_THINKING|>
+            if (cs.reasoning_start.empty() && !cs.reasoning_end.empty()) {
+                // First, try to derive directly from the closing tag format
+                if (cs.reasoning_end.length() > 3 && cs.reasoning_end[0] == '<' && cs.reasoning_end[1] == '/') {
+                    // Standard XML closing tag like </think> -> <think>
+                    size_t tag_end_pos = cs.reasoning_end.find('>');
+                    if (tag_end_pos != std::string::npos) {
+                        std::string tag_name = cs.reasoning_end.substr(2, tag_end_pos - 2);
+                        cs.reasoning_start   = "<" + tag_name + ">";
+                        LOG_DBG("Method 1: Derived reasoning_start from closing tag format\n");
+                        LOG_DBG("  start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str());
+                    }
+                } else if (cs.reasoning_end.find("<|END_") == 0 || cs.reasoning_end.find("<|/") == 0) {
+                    // Special token format like <|END_THINKING|> -> <|START_THINKING|>
+                    // or <|/think|> -> <|think|>
+                    if (cs.reasoning_end.find("<|END_") == 0) {
+                        std::string core   = cs.reasoning_end.substr(6);  // Remove "<|END_"
+                        cs.reasoning_start = "<|START_" + core;
+                    } else {
+                        std::string core   = cs.reasoning_end.substr(3);  // Remove "<|/"
+                        cs.reasoning_start = "<|" + core;
+                    }
+                    LOG_DBG("Method 1: Derived reasoning_start from special token format\n");
+                    LOG_DBG("  start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str());
+                }
+            }
+
+            if (!cs.reasoning_start.empty()) {
+                LOG_DBG("Method 1: Found reasoning markers via reasoning_content field\n");
+                LOG_DBG("  start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str());
+            }
+        }
+    }
+
+    // Method 2: Compare prompts with enable_thinking true vs false
+    if (cs.reasoning_start.empty()) {
+        LOG_DBG("Method 1 failed, trying Method 2 (enable_thinking toggle)\n");
+
+        json user_msg = {
+            { "role",    "user"  },
+            { "content", "Hello" }
+        };
+
+        templates_params inputs_prompt;
+        inputs_prompt.messages              = { user_msg };
+        inputs_prompt.add_generation_prompt = true;
+        inputs_prompt.enable_thinking = false;
+        std::string prompt_no_think;
+        try {
+            prompt_no_think = common_chat_template_direct_apply(tmpl, inputs_prompt);
+        } catch (...) {
+            prompt_no_think = "";
+        }
+
+        inputs_prompt.enable_thinking = true;
+        std::string prompt_think;
+        try {
+            prompt_think = common_chat_template_direct_apply(tmpl, inputs_prompt);
+        } catch (...) {
+            prompt_think = "";
+        }
+
+        if (!prompt_think.empty() && prompt_think != prompt_no_think) {
+            // Find the difference - this should be the reasoning start marker
+            size_t diff_pos = 0;
+            while (diff_pos < prompt_no_think.length() && diff_pos < prompt_think.length() &&
+                   prompt_no_think[diff_pos] == prompt_think[diff_pos]) {
+                diff_pos++;
+            }
+
+            // Check which direction has extra content
+            if (prompt_think.length() > prompt_no_think.length()) {
+                // Normal case: enable_thinking=true adds content (e.g., <think> at the end)
+                std::string diff = prompt_think.substr(diff_pos);
+
+                // Only use if it looks like a tag
+                if (diff.find('<') != std::string::npos || diff.find('[') != std::string::npos) {
+                    cs.reasoning_start = diff;
+                    cs.reasoning_end   = create_closing_tag(diff);
+                    trim_whitespace(cs.reasoning_start);
+                    trim_whitespace(cs.reasoning_end);
+
+                    LOG_DBG("Method 2: Found reasoning markers via enable_thinking toggle\n");
+                    LOG_DBG("  start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str());
+                }
+            } else {
+                // Reverse case: enable_thinking=false adds content (e.g., GLM-4.6 adds <think></think>)
+                // This means the template adds an empty thinking block when thinking is disabled
+                std::string diff = prompt_no_think.substr(diff_pos);
+
+                // Look for adjacent opening and closing tags like <think></think>
+                size_t open_start = diff.find('<');
+                if (open_start != std::string::npos) {
+                    size_t open_end = diff.find('>', open_start);
+                    if (open_end != std::string::npos) {
+                        std::string opening_tag = diff.substr(open_start, open_end - open_start + 1);
+                        // Skip if it looks like a role marker
+                        if (opening_tag.find("assistant") == std::string::npos &&
+                            opening_tag.find("user") == std::string::npos &&
+                            opening_tag.find("system") == std::string::npos) {
+                            std::string expected_close = create_closing_tag(opening_tag);
+                            // Check if the closing tag follows immediately (empty thinking block)
+                            size_t      close_pos      = diff.find(expected_close, open_end + 1);
+                            if (close_pos != std::string::npos) {
+                                // Verify only whitespace between tags
+                                std::string between = diff.substr(open_end + 1, close_pos - open_end - 1);
+                                bool        only_ws = true;
+                                for (char c : between) {
+                                    if (!std::isspace(static_cast<unsigned char>(c))) {
+                                        only_ws = false;
+                                        break;
+                                    }
+                                }
+                                if (only_ws) {
+                                    cs.reasoning_start = opening_tag;
+                                    cs.reasoning_end   = expected_close;
+                                    trim_whitespace(cs.reasoning_start);
+                                    trim_whitespace(cs.reasoning_end);
+
+                                    LOG_DBG("Method 2: Found reasoning markers via enable_thinking toggle (reverse)\n");
+                                    LOG_DBG("  start: '%s', end: '%s'\n", cs.reasoning_start.c_str(),
+                                            cs.reasoning_end.c_str());
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Method 3: Check if the prompt ends with an unclosed reasoning tag
+    if (cs.reasoning_start.empty()) {
+        LOG_DBG("Method 2 failed, trying Method 3 (prompt ending with open tag)\n");
+
+        json user_msg = {
+            { "role",    "user"  },
+            { "content", "Hello" }
+        };
+
+        templates_params inputs_prompt;
+        inputs_prompt.messages                         = { user_msg };
+        inputs_prompt.add_generation_prompt            = true;
+        inputs_prompt.enable_thinking                  = true;
+
+        std::string prompt;
+        try {
+            prompt = common_chat_template_direct_apply(tmpl, inputs_prompt);
+        } catch (...) {
+            prompt = "";
+        }
+
+        if (!prompt.empty()) {
+            // Save trailing whitespace before trimming
+            std::string trailing_ws;
+            size_t      end_pos = prompt.length();
+            while (end_pos > 0 && (prompt[end_pos - 1] == '\n' || prompt[end_pos - 1] == '\r')) {
+                trailing_ws = prompt[end_pos - 1] + trailing_ws;
+                end_pos--;
+            }
+
+            trim_trailing_newlines(prompt);
+
+            // Find the last tag in the prompt
+            size_t last_open_angle  = prompt.rfind('<');
+            size_t last_close_angle = prompt.rfind('>');
+
+            // Check for closed tags at the end
+            if (last_open_angle != std::string::npos && last_close_angle != std::string::npos &&
+                last_close_angle == prompt.length() - 1 && last_close_angle > last_open_angle) {
+                std::string tag = prompt.substr(last_open_angle);
+
+                // Check if this looks like a reasoning tag (not a role marker)
+                std::vector<std::string> blacklisted_tags = {
+                    "<|CHATBOT_TOKEN|>", "<|SYSTEM_TOKEN|>",  "<|USER_TOKEN|>",  "<|ASSISTANT_TOKEN|>", "<|im_start|>",
+                    "<|im_end|>",        "<|start_of_role|>", "<|end_of_role|>", "<|end_of_text|>",     "<|end|>",
+                    "<|assistant|>",     "<|user|>",          "<|system|>",      "<assistant>",         "<user>",
+                    "<system>"
+                };
+
+                bool is_blacklisted = false;
+                for (const auto & blacklisted : blacklisted_tags) {
+                    if (tag == blacklisted) {
+                        is_blacklisted = true;
+                        break;
+                    }
+                }
+
+                // Check if it looks like a thinking/reasoning tag
+                std::string lower_tag = tag;
+                std::transform(lower_tag.begin(), lower_tag.end(), lower_tag.begin(), ::tolower);
+                bool looks_like_reasoning = lower_tag.find("think") != std::string::npos ||
+                                            lower_tag.find("reason") != std::string::npos ||
+                                            lower_tag.find("thought") != std::string::npos;
+
+                if (!is_blacklisted && looks_like_reasoning) {
+                    // Check if the detected tag is a close tag (starts with </)
+                    // This handles templates like DeepSeek-V3.1 that end with </think> when thinking is disabled
+                    bool is_close_tag = (tag.size() > 2 && tag[0] == '<' && tag[1] == '/');
+
+                    if (is_close_tag) {
+                        // The tag is a close tag (e.g., </think>)
+                        // Derive the open tag by removing the '/'
+                        std::string tag_name = extract_tag_name(tag);  // Returns "/think" for </think>
+                        if (!tag_name.empty() && tag_name[0] == '/') {
+                            tag_name = tag_name.substr(1);             // Remove leading '/'
+                        }
+                        cs.reasoning_start = "<" + tag_name + ">";
+                        cs.reasoning_end   = tag;
+                        trim_whitespace(cs.reasoning_start);
+                        trim_whitespace(cs.reasoning_end);
+
+                        LOG_DBG("Method 3: Found reasoning markers via prompt ending with CLOSE tag\n");
+                        LOG_DBG("  start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str());
+
+                        // Note: The prompt ends with the close tag, meaning thinking is disabled.
+                        // The reasoning_mode will be set in detect_reasoning_mode() which will
+                        // correctly identify this as NOT forced open since the prompt ends with
+                        // the end marker, not the start marker.
+                    } else {
+                        // Standard case: open tag at the end (e.g., <think>)
+                        cs.reasoning_start = tag + trailing_ws;
+                        cs.reasoning_end   = create_closing_tag(tag) + trailing_ws;
+                        trim_whitespace(cs.reasoning_start);
+                        trim_whitespace(cs.reasoning_end);
+
+                        LOG_DBG("Method 3: Found reasoning markers via prompt ending with tag\n");
+                        LOG_DBG("  start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str());
+                    }
+                }
+            }
+        }
+    }
+
+    // Method 4: Look for adjacent opening/closing tag pairs with common content in prompt
+    // This detects patterns like <think></think>, <|START_THINKING|><|END_THINKING|>, [think][/think]
+    if (cs.reasoning_start.empty()) {
+        LOG_DBG("Method 3 failed, trying Method 4 (adjacent tag pairs with common content)\n");
+
+        json user_msg = {
+            { "role",    "user"  },
+            { "content", "Hello" }
+        };
+
+        templates_params inputs_prompt;
+        inputs_prompt.messages                         = { user_msg };
+        inputs_prompt.add_generation_prompt            = true;
+        // Try with thinking disabled - templates may output empty thinking blocks
+        inputs_prompt.enable_thinking                  = false;
+
+        std::string prompt;
+        try {
+            prompt = common_chat_template_direct_apply(tmpl, inputs_prompt);
+        } catch (...) {
+            prompt = "";
+        }
+
+        if (!prompt.empty()) {
+            // Look for patterns like <tag1><tag2> or <tag1>...<tag2> where tag1 and tag2 share a common word
+            // Common patterns:
+            //   <think></think>
+            //   <|START_THINKING|><|END_THINKING|>
+            //   [think][/think]
+
+            // Find potential tag pairs by looking for closing tags that immediately follow opening tags
+            // Pattern: opening tag followed by closing tag with same keyword
+            std::vector<std::tuple<std::string, std::string, std::string>> tag_patterns = {
+                // (opening pattern, closing pattern, keyword to match)
+                { "<|START_",   "<|END_",      "THINKING" },
+                { "<|START_",   "<|END_",      "THOUGHT"  },
+                { "<|START_",   "<|END_",      "REASON"   },
+                { "<think>",    "</think>",    ""         },
+                { "<Think>",    "</Think>",    ""         },
+                { "<THINK>",    "</THINK>",    ""         },
+                { "[think]",    "[/think]",    ""         },
+                { "[THINK]",    "[/THINK]",    ""         },
+                { "<thinking>", "</thinking>", ""         },
+                { "<THINKING>", "</THINKING>", ""         },
+                { "<|think|>",  "<|/think|>",  ""         },
+            };
+
+            for (const auto & [open_prefix, close_prefix, keyword] : tag_patterns) {
+                size_t open_pos = prompt.find(open_prefix);
+                if (open_pos == std::string::npos) {
+                    continue;
+                }
+
+                std::string start_tag;
+                std::string end_tag;
+
+                if (!keyword.empty()) {
+                    // Pattern like <|START_THINKING|><|END_THINKING|>
+                    std::string full_open     = open_prefix + keyword;
+                    size_t      full_open_pos = prompt.find(full_open);
+                    if (full_open_pos == std::string::npos) {
+                        continue;
+                    }
+
+                    // Find the end of this tag (look for |> or >)
+                    size_t tag_end = prompt.find("|>", full_open_pos + full_open.length());
+                    if (tag_end == std::string::npos) {
+                        tag_end = prompt.find('>', full_open_pos + full_open.length());
+                    }
+                    if (tag_end == std::string::npos) {
+                        continue;
+                    }
+
+                    start_tag =
+                        prompt.substr(full_open_pos, tag_end - full_open_pos + (prompt[tag_end] == '|' ? 2 : 1));
+
+                    // Look for the corresponding end tag
+                    std::string expected_close = close_prefix + keyword;
+                    size_t      close_pos      = prompt.find(expected_close, tag_end);
+                    if (close_pos == std::string::npos) {
+                        continue;
+                    }
+
+                    // Find end of close tag
+                    size_t close_end = prompt.find("|>", close_pos + expected_close.length());
+                    if (close_end == std::string::npos) {
+                        close_end = prompt.find('>', close_pos + expected_close.length());
+                    }
+                    if (close_end == std::string::npos) {
+                        continue;
+                    }
+
+                    end_tag = prompt.substr(close_pos, close_end - close_pos + (prompt[close_end] == '|' ? 2 : 1));
+                } else {
+                    // Simple pattern like <think></think>
+                    start_tag        = open_prefix;
+                    size_t close_pos = prompt.find(close_prefix, open_pos + start_tag.length());
+                    if (close_pos == std::string::npos) {
+                        continue;
+                    }
+                    end_tag = close_prefix;
+                }
+
+                // Verify the tags are adjacent or nearly adjacent (only whitespace between)
+                size_t start_end_pos = prompt.find(start_tag) + start_tag.length();
+                size_t end_start_pos = prompt.find(end_tag, start_end_pos);
+                if (end_start_pos != std::string::npos) {
+                    std::string between         = prompt.substr(start_end_pos, end_start_pos - start_end_pos);
+                    // Allow only whitespace between the tags (empty thinking block)
+                    bool        only_whitespace = true;
+                    for (char c : between) {
+                        if (!std::isspace(static_cast<unsigned char>(c))) {
+                            only_whitespace = false;
+                            break;
+                        }
+                    }
+
+                    if (only_whitespace) {
+                        cs.reasoning_start = start_tag;
+                        cs.reasoning_end   = end_tag;
+                        LOG_DBG("Method 4: Found reasoning markers via adjacent tag pairs\n");
+                        LOG_DBG("  start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str());
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    if (cs.reasoning_start.empty()) {
+        LOG_DBG("No reasoning markers detected\n");
+    }
+}
+
+void template_analyzer::detect_content_markers(const common_chat_template & tmpl, content_structure & cs) {
+    LOG_DBG("=== DETECTING CONTENT MARKERS ===\n");
+
+    // Render template with a unique content marker
+    json user_msg = {
+        { "role",    "user"  },
+        { "content", "Hello" }
+    };
+    json assistant_msg = {
+        { "role",    "assistant"            },
+        { "content", "UNIQUE_CONTENT_12345" }
+    };
+
+    templates_params inputs;
+    inputs.messages                         = { user_msg, assistant_msg };
+    // Try with thinking enabled first (some templates only wrap content when reasoning is present)
+    inputs.extra_context["thinking"]        = true;
+    inputs.enable_thinking                  = true;
+
+    std::string output_with_thinking;
+    try {
+        output_with_thinking = common_chat_template_direct_apply(tmpl, inputs);
+    } catch (...) {
+        output_with_thinking = "";
+    }
+
+    // Also render without thinking
+    inputs.extra_context["thinking"]        = false;
+    inputs.enable_thinking                  = false;
+
+    std::string output_no_thinking;
+    try {
+        output_no_thinking = common_chat_template_direct_apply(tmpl, inputs);
+    } catch (...) {
+        output_no_thinking = "";
+    }
+
+    // Check both outputs for content markers
+    auto find_content_markers = [&](const std::string & output) -> std::pair<std::string, std::string> {
+        size_t marker_pos = output.find("UNIQUE_CONTENT_12345");
+        if (marker_pos == std::string::npos) {
+            return { "", "" };
+        }
+
+        // Known content marker patterns
+        std::vector<std::pair<std::string, std::string>> patterns = {
+            { "<|START_RESPONSE|>", "<|END_RESPONSE|>"      },
+            { "<|response|>",       "<|/response|>"         },
+            { "<response>",         "</response>"           },
+            { "<output>",           "</output>"             },
+            { "<answer>",           "</answer>"             },
+            { "<|CHATBOT_TOKEN|>",  "<|END_OF_TURN_TOKEN|>" },
+        };
+
+        for (const auto & [start_pattern, end_pattern] : patterns) {
+            size_t start_pos = output.rfind(start_pattern, marker_pos);
+            if (start_pos != std::string::npos) {
+                // Check that there's only whitespace between the start pattern and our marker
+                std::string between =
+                    output.substr(start_pos + start_pattern.length(), marker_pos - start_pos - start_pattern.length());
+                size_t first_non_ws = between.find_first_not_of(" \t\n\r");
+                if (first_non_ws == std::string::npos) {
+                    // Found valid start marker, look for end marker
+                    size_t marker_end = marker_pos + strlen("UNIQUE_CONTENT_12345");
+                    size_t end_pos    = output.find(end_pattern, marker_end);
+                    if (end_pos != std::string::npos) {
+                        std::string after              = output.substr(marker_end, end_pos - marker_end);
+                        size_t      first_non_ws_after = after.find_first_not_of(" \t\n\r");
+                        if (first_non_ws_after == std::string::npos) {
+                            return { start_pattern, end_pattern };
+                        }
+                    }
+                }
+            }
+        }
+
+        return { "", "" };
+    };
+
+    auto [start_with_thinking, end_with_thinking] = find_content_markers(output_with_thinking);
+    auto [start_no_thinking, end_no_thinking]     = find_content_markers(output_no_thinking);
+
+    if (!start_with_thinking.empty() && !start_no_thinking.empty()) {
+        // Content is always wrapped
+        cs.content_mode  = content_structure::CONTENT_ALWAYS_WRAPPED;
+        cs.content_start = start_with_thinking;
+        cs.content_end   = end_with_thinking;
+        LOG_DBG("Content markers found in both thinking modes (ALWAYS_WRAPPED)\n");
+    } else if (!start_with_thinking.empty() && start_no_thinking.empty()) {
+        // Content is wrapped only when reasoning is present
+        cs.content_mode  = content_structure::CONTENT_WRAPPED_WITH_REASONING;
+        cs.content_start = start_with_thinking;
+        cs.content_end   = end_with_thinking;
+        LOG_DBG("Content markers found only with thinking enabled (WRAPPED_WITH_REASONING)\n");
+    } else if (!start_no_thinking.empty()) {
+        // Unusual: content wrapped without thinking but not with? Use what we found
+        cs.content_mode  = content_structure::CONTENT_ALWAYS_WRAPPED;
+        cs.content_start = start_no_thinking;
+        cs.content_end   = end_no_thinking;
+        LOG_DBG("Content markers found only without thinking (treating as ALWAYS_WRAPPED)\n");
+    } else {
+        cs.content_mode = content_structure::CONTENT_PLAIN;
+        LOG_DBG("No content markers detected (PLAIN)\n");
+    }
+
+    LOG_DBG("Content markers: start='%s', end='%s'\n", cs.content_start.c_str(), cs.content_end.c_str());
+}
+
+content_structure::reasoning_mode_type template_analyzer::detect_reasoning_mode(const content_structure & cs,
+                                                                                const std::string &       prompt) {
+    LOG_DBG("=== DETECTING REASONING MODE ===\n");
+
+    // If both markers are empty, mode is NONE
+    if (cs.reasoning_start.empty() && cs.reasoning_end.empty()) {
+        LOG_DBG("No reasoning markers, mode=REASONING_NONE\n");
+        return content_structure::REASONING_NONE;
+    }
+
+    // Handle case with end marker but no start marker (implicit start)
+    if (cs.reasoning_start.empty() && !cs.reasoning_end.empty()) {
+        LOG_DBG("Reasoning end marker present but no start marker, mode=REASONING_FORCED_OPEN\n");
+        return content_structure::REASONING_FORCED_OPEN;
+    }
+
+    // Check if the prompt ends with the reasoning start marker (forced open)
+    std::string trimmed_prompt = prompt;
+    trim_trailing_newlines(trimmed_prompt);
+
+    std::string trimmed_marker = cs.reasoning_start;
+    trim_whitespace(trimmed_marker);
+
+    if (string_ends_with(trimmed_prompt, trimmed_marker)) {
+        LOG_DBG("Prompt ends with reasoning start marker, mode=REASONING_FORCED_OPEN\n");
+        return content_structure::REASONING_FORCED_OPEN;
+    }
+
+    // Otherwise, reasoning is optional
+    LOG_DBG("Reasoning markers present but not forced, mode=REASONING_OPTIONAL\n");
+    return content_structure::REASONING_OPTIONAL;
+}
+
+tool_call_structure template_analyzer::analyze_tool_structure(const common_chat_template & tmpl,
+                                                              const content_structure &    content) {
+    (void) content;  // May be used in future for better tool detection
+
+    LOG_DBG("=== PHASE 2: ANALYZING TOOL STRUCTURE ===\n");
+
+    tool_call_structure ts;
+
+    // Use differential analysis to detect tool patterns
+    // This now includes a robust test that renders two payloads:
+    // 1. Tool definitions + content only
+    // 2. Tool definitions + content + tool calls
+    // If outputs are identical, the template doesn't support tool calls
+    auto discovered = analyze_by_differential(tmpl);
+    auto format     = determine_format_from_patterns(discovered);
+
+    // Strip EOS tokens from discovered patterns (handles both standard <|eos|> and fullwidth <｜end▁of▁sentence｜>)
+    if (!discovered.tool_call_closer.empty()) {
+        LOG_DBG("Before stripping: tool_call_closer='%s' (len=%zu)\n", discovered.tool_call_closer.c_str(),
+                discovered.tool_call_closer.length());
+        discovered.tool_call_closer = strip_eos_token(discovered.tool_call_closer);
+        LOG_DBG("After stripping: tool_call_closer='%s'\n", discovered.tool_call_closer.c_str());
+    }
+    if (!discovered.tool_call_end_marker.empty()) {
+        discovered.tool_call_end_marker = strip_eos_token(discovered.tool_call_end_marker);
+    }
+
+    if (format == FORMAT_UNKNOWN) {
+        LOG_DBG("Template does not support tool calls (differential analysis returned no patterns)\n");
+        ts.supports_tools = false;
+        return ts;
+    }
+
+    // Propagate requires_nonnull_content flag from differential analysis
+    ts.requires_nonnull_content = discovered.requires_nonnull_content;
+    if (ts.requires_nonnull_content) {
+        LOG_DBG("Template requires non-null content (renders null as 'None')\n");
+    }
+
+    // Check if minja reports tool call support (for informational purposes)
+    auto caps = tmpl.original_caps();
+    if (!caps.supports_tool_calls) {
+        LOG_DBG("Note: minja caps indicate no tool support, but differential analysis found patterns\n");
+    }
+
+    if (format == FORMAT_JSON_NATIVE) {
+        analyze_json_format(ts, discovered);
+    } else if (format == FORMAT_XML_CONSTRUCTED) {
+        analyze_xml_format(ts, discovered);
+    } else if (format == FORMAT_BRACKET_TAG) {
+        analyze_bracket_tag_format(ts, discovered);
+    } else if (format == FORMAT_RECIPIENT_BASED) {
+        analyze_recipient_based_format(ts, discovered);
+    } else if (format == FORMAT_MARKDOWN_CODE_BLOCK) {
+        analyze_markdown_code_block_format(ts, discovered);
+    }
+
+    return ts;
+}
+
+void template_analyzer::collect_preserved_tokens(template_analysis_result & result) {
+    LOG_DBG("=== COLLECTING PRESERVED TOKENS ===\n");
+
+    std::vector<std::string> tokens;
+
+    // Add reasoning markers
+    if (!result.content.reasoning_start.empty()) {
+        tokens.push_back(result.content.reasoning_start);
+    }
+    if (!result.content.reasoning_end.empty()) {
+        tokens.push_back(result.content.reasoning_end);
+    }
+
+    // Add content markers
+    if (!result.content.content_start.empty()) {
+        tokens.push_back(result.content.content_start);
+    }
+    if (!result.content.content_end.empty()) {
+        tokens.push_back(result.content.content_end);
+    }
+
+    // Add tool section markers
+    if (!result.tools.tool_section_start.empty()) {
+        tokens.push_back(result.tools.tool_section_start);
+    }
+    if (!result.tools.tool_section_end.empty()) {
+        tokens.push_back(result.tools.tool_section_end);
+    }
+
+    // Add function markers for tag-based formats
+    if (result.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
+        if (!result.tools.function_prefix.empty()) {
+            tokens.push_back(result.tools.function_prefix);
+        }
+        if (!result.tools.function_close.empty()) {
+            tokens.push_back(result.tools.function_close);
+        }
+    }
+
+    // Add markers for prefixed-indexed formats (e.g., Kimi-K2)
+    if (result.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
+        if (!result.tools.per_call_start.empty()) {
+            tokens.push_back(result.tools.per_call_start);
+        }
+        if (!result.tools.args_marker.empty()) {
+            tokens.push_back(result.tools.args_marker);
+        }
+        if (!result.tools.per_call_end.empty()) {
+            tokens.push_back(result.tools.per_call_end);
+        }
+    }
+
+    // Add argument markers for tagged formats
+    if (result.tools.argument_format == tool_call_structure::ARGS_TAGGED) {
+        if (!result.tools.arg_prefix.empty()) {
+            tokens.push_back(result.tools.arg_prefix);
+        }
+        if (!result.tools.arg_close.empty()) {
+            tokens.push_back(result.tools.arg_close);
+        }
+    }
+
+    // Add markers for markdown code block format (Cohere Command-R Plus)
+    if (result.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK) {
+        if (!result.tools.code_block_marker.empty()) {
+            tokens.push_back(result.tools.code_block_marker);
+        }
+        if (!result.tools.tool_section_end.empty()) {
+            tokens.push_back(result.tools.tool_section_end);  // Closing code fence ```
+        }
+    }
+
+    result.preserved_tokens = tokens;
+    LOG_DBG("Collected %zu preserved tokens\n", tokens.size());
+}
+
+void template_analyzer::analyze_json_format(tool_call_structure & ts, const internal_discovered_pattern & discovered) {
+    ts.supports_tools     = true;
+    ts.function_format    = tool_call_structure::FUNC_JSON_OBJECT;
+    ts.argument_format    = tool_call_structure::ARGS_JSON;
+    ts.tool_section_start = discovered.tool_call_start_marker;
+    ts.tool_section_end   = discovered.tool_call_end_marker;
+    ts.name_field         = discovered.tool_name_field;
+    ts.args_field         = discovered.tool_args_field;
+    ts.id_field           = discovered.tool_id_field;
+
+    // Check for FUNC_NAME_AS_KEY format (e.g. Apertus: {"function_name": args})
+    // This is characterized by the opener ending in {" and no explicit name field found yet
+    if (!discovered.tool_call_opener.empty() && discovered.tool_call_opener.length() >= 2 &&
+        discovered.tool_call_opener.substr(discovered.tool_call_opener.length() - 2) == "{\"") {
+        LOG_DBG("Detected FUNC_NAME_AS_KEY format from tool_call_opener ending in '{\"' \n");
+        ts.function_format = tool_call_structure::FUNC_NAME_AS_KEY;
+    }
+
+    // For JSON_NATIVE format, clean up tool_section_end to only include the closing tag
+    // The differential analysis may include JSON closing braces (e.g., "}}\n</tool_call>")
+    // but the parser handles JSON separately, so we only need the tag marker
+    if (!ts.tool_section_end.empty()) {
+        size_t tag_start = ts.tool_section_end.find("</");
+        if (tag_start != std::string::npos) {
+            size_t tag_end = ts.tool_section_end.find('>', tag_start);
+            if (tag_end != std::string::npos) {
+                // Check if there is a closing bracket ']' before the tag
+                size_t bracket_pos = ts.tool_section_end.rfind(']', tag_start);
+                if (bracket_pos != std::string::npos) {
+                    // Include the bracket
+                    ts.tool_section_end = ts.tool_section_end.substr(bracket_pos, tag_end - bracket_pos + 1);
+                } else {
+                    ts.tool_section_end = ts.tool_section_end.substr(tag_start, tag_end - tag_start + 1);
+                }
+            }
+        } else {
+            // Try other closing patterns like ]<|END_ACTION|>
+            tag_start = ts.tool_section_end.find("<|");
+            if (tag_start != std::string::npos) {
+                size_t tag_end = ts.tool_section_end.find("|>", tag_start);
+                if (tag_end != std::string::npos) {
+                    // Include the opening bracket if present
+                    size_t bracket_pos = ts.tool_section_end.rfind(']', tag_start);
+                    if (bracket_pos != std::string::npos && bracket_pos + 1 == tag_start) {
+                        ts.tool_section_end = ts.tool_section_end.substr(bracket_pos, tag_end - bracket_pos + 2);
+                    } else {
+                        ts.tool_section_end = ts.tool_section_end.substr(tag_start, tag_end - tag_start + 2);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void template_analyzer::analyze_xml_format(tool_call_structure & ts, const internal_discovered_pattern & discovered) {
+    ts.supports_tools     = true;
+    ts.function_format    = tool_call_structure::FUNC_TAG_WITH_NAME;
+    ts.tool_section_start = discovered.tool_call_start_marker;
+    ts.tool_section_end   = discovered.tool_call_end_marker;
+
+    // Extract function tag patterns
+    if (!discovered.function_opener.empty()) {
+        char first = discovered.function_opener[0];
+        if (first != '<' && first != '{' && first != '[') {
+            // Non-XML/JSON prefix format (e.g., ">>>", "##", etc.)
+            // Function name follows prefix directly, ends with newline
+            ts.function_prefix = discovered.function_opener;
+            ts.function_suffix = "\n";  // Function name typically ends with newline
+            ts.function_close  = "";    // No closing tag for prefix formats
+        } else {
+            size_t eq_pos = discovered.function_opener.find('=');
+            if (eq_pos != std::string::npos) {
+                // Check if there's a quote after the equals sign
+                if (eq_pos + 1 < discovered.function_opener.length() &&
+                    (discovered.function_opener[eq_pos + 1] == '"' || discovered.function_opener[eq_pos + 1] == '\'')) {
+                    ts.function_prefix = discovered.function_opener.substr(0, eq_pos + 2);
+                } else {
+                    ts.function_prefix = discovered.function_opener.substr(0, eq_pos + 1);
+                }
+                ts.function_suffix = discovered.function_name_suffix;
+
+                // For formats like <function=name>{args}</function>, where function_prefix
+                // IS the section start (no separate wrapper), tool_section_end is the function close.
+                // But for nested formats like <tool_call><function=name>...</function></tool_call>,
+                // the function_close is separate from tool_section_end.
+                // We detect the non-nested case when tool_section_start matches function_prefix
+                // (or tool_section_start was already cleared because it matched).
+                bool section_start_matches_prefix = ts.tool_section_start.empty() ||
+                                                    ts.tool_section_start.find(ts.function_prefix) == 0 ||
+                                                    ts.function_prefix.find(ts.tool_section_start) == 0;
+                if (section_start_matches_prefix && ts.function_prefix.find('<') == 0 && !ts.tool_section_end.empty() &&
+                    ts.tool_section_end.find("</") == 0) {
+                    ts.function_close   = ts.tool_section_end;
+                    ts.tool_section_end = "";  // Clear to avoid double wrapping
+                } else {
+                    ts.function_close = discovered.function_closer;
+                }
+            } else if (!discovered.function_opener.empty() && discovered.function_opener[0] == '<') {
+                // Check for FUNC_PREFIXED_INDEXED format
+                // Detected by: function_opener ends with "." (namespace separator)
+                //              AND function_name_suffix starts with ":" followed by digit (index)
+                // Example: <|tool_call_begin|>functions.name:0<|tool_call_argument_begin|>
+                size_t namespace_dot = discovered.function_opener.rfind('.');
+                bool   has_namespace =
+                    (namespace_dot != std::string::npos && namespace_dot == discovered.function_opener.length() - 1);
+
+                bool has_index =
+                    (!discovered.function_name_suffix.empty() && discovered.function_name_suffix[0] == ':' &&
+                     discovered.function_name_suffix.length() > 1 &&
+                     std::isdigit(static_cast<unsigned char>(discovered.function_name_suffix[1])));
+
+                if (has_namespace && has_index) {
+                    LOG_DBG("Detected FUNC_PREFIXED_INDEXED format: namespace ends with '.', suffix has ':N' index\n");
+                    ts.function_format = tool_call_structure::FUNC_PREFIXED_INDEXED;
+
+                    // Split function_opener into per_call_start and function_namespace
+                    // e.g., "<|tool_call_begin|>functions." -> "<|tool_call_begin|>" + "functions."
+                    // Find where the namespace starts (after the last '>' before the '.')
+                    size_t namespace_start = discovered.function_opener.rfind('>');
+                    if (namespace_start != std::string::npos && namespace_start < namespace_dot) {
+                        ts.per_call_start     = discovered.function_opener.substr(0, namespace_start + 1);
+                        ts.function_namespace = discovered.function_opener.substr(namespace_start + 1);
+                    } else {
+                        // Fallback: namespace is just the part ending with '.'
+                        ts.per_call_start     = discovered.function_opener.substr(0, namespace_dot);
+                        ts.function_namespace = ".";
+                    }
+
+                    // Extract args_marker from function_name_suffix
+                    // Format: ":0<|some_marker|>" -> index is ":0", args_marker is "<|some_marker|>"
+                    size_t args_marker_start = discovered.function_name_suffix.find('<');
+                    if (args_marker_start != std::string::npos) {
+                        size_t args_marker_end = discovered.function_name_suffix.find('>', args_marker_start);
+                        if (args_marker_end != std::string::npos) {
+                            ts.args_marker = discovered.function_name_suffix.substr(
+                                args_marker_start, args_marker_end - args_marker_start + 1);
+                        }
+                    }
+
+                    // Derive per_call_end from tool_call_closer by finding corresponding end marker
+                    // tool_call_closer contains per_call_end + tool_section_end
+                    // We find per_call_end by looking for a marker that structurally matches per_call_start
+                    if (!discovered.tool_call_closer.empty() && !ts.per_call_start.empty()) {
+                        // Extract structural pattern from per_call_start
+                        // e.g., "<|tool_call_begin|>" -> look for "<|tool_call_...|>" in closer
+                        size_t start_marker_begin = ts.per_call_start.find("<|");
+                        size_t start_marker_end   = ts.per_call_start.rfind("|>");
+                        if (start_marker_begin != std::string::npos && start_marker_end != std::string::npos) {
+                            // Find the base pattern (e.g., "<|tool_call" from "<|tool_call_begin|>")
+                            std::string start_content = ts.per_call_start.substr(
+                                start_marker_begin + 2, start_marker_end - start_marker_begin - 2);
+                            // Find a related marker in the closer
+                            size_t closer_pos = discovered.tool_call_closer.find("<|");
+                            while (closer_pos != std::string::npos) {
+                                size_t closer_end = discovered.tool_call_closer.find("|>", closer_pos);
+                                if (closer_end != std::string::npos) {
+                                    std::string candidate =
+                                        discovered.tool_call_closer.substr(closer_pos, closer_end - closer_pos + 2);
+                                    // Check if this marker shares a common prefix with per_call_start
+                                    // (ignoring _begin vs _end suffix differences)
+                                    std::string candidate_content = candidate.substr(2, candidate.length() - 4);
+                                    // Find common prefix between start_content and candidate_content
+                                    size_t      common_len        = 0;
+                                    while (common_len < start_content.length() &&
+                                           common_len < candidate_content.length() &&
+                                           start_content[common_len] == candidate_content[common_len]) {
+                                        common_len++;
+                                    }
+                                    // If substantial overlap (>50%), this is likely the per_call_end
+                                    if (common_len > start_content.length() / 2 &&
+                                        candidate_content.find("end") != std::string::npos) {
+                                        ts.per_call_end = candidate;
+                                        break;
+                                    }
+                                }
+                                closer_pos = discovered.tool_call_closer.find("<|", closer_pos + 1);
+                            }
+                        }
+                    }
+
+                    // Derive tool_section_end from tool_section_start by finding matching end marker
+                    // For FUNC_PREFIXED_INDEXED, we always derive this to get the correct marker
+                    // (the default discovered.tool_call_end_marker may contain extra content)
+                    if (!ts.tool_section_start.empty()) {
+                        size_t start_marker_begin = ts.tool_section_start.find("<|");
+                        size_t start_marker_end   = ts.tool_section_start.rfind("|>");
+                        if (start_marker_begin != std::string::npos && start_marker_end != std::string::npos) {
+                            std::string start_content = ts.tool_section_start.substr(
+                                start_marker_begin + 2, start_marker_end - start_marker_begin - 2);
+                            size_t closer_pos = discovered.tool_call_closer.find("<|");
+                            while (closer_pos != std::string::npos) {
+                                size_t closer_end = discovered.tool_call_closer.find("|>", closer_pos);
+                                if (closer_end != std::string::npos) {
+                                    std::string candidate =
+                                        discovered.tool_call_closer.substr(closer_pos, closer_end - closer_pos + 2);
+                                    std::string candidate_content = candidate.substr(2, candidate.length() - 4);
+                                    size_t      common_len        = 0;
+                                    while (common_len < start_content.length() &&
+                                           common_len < candidate_content.length() &&
+                                           start_content[common_len] == candidate_content[common_len]) {
+                                        common_len++;
+                                    }
+                                    if (common_len > start_content.length() / 2 &&
+                                        candidate_content.find("end") != std::string::npos) {
+                                        ts.tool_section_end = candidate;
+                                        break;
+                                    }
+                                }
+                                closer_pos = discovered.tool_call_closer.find("<|", closer_pos + 1);
+                            }
+                        }
+                    }
+
+                    LOG_DBG(
+                        "FUNC_PREFIXED_INDEXED: per_call_start='%s', namespace='%s', args_marker='%s', "
+                        "per_call_end='%s'\n",
+                        ts.per_call_start.c_str(), ts.function_namespace.c_str(), ts.args_marker.c_str(),
+                        ts.per_call_end.c_str());
+                } else {
+                    // Other formats like <|tool_call_begin|>name (non-indexed)
+                    // Use function_opener as default, but try to use full tool_call_opener if it contains more
+                    ts.function_prefix = discovered.function_opener;
+                    LOG_DBG("Initial function_prefix: '%s', tool_call_opener: '%s', tool_section_start: '%s'\n",
+                            ts.function_prefix.c_str(), discovered.tool_call_opener.c_str(),
+                            ts.tool_section_start.c_str());
+                    if (!ts.tool_section_start.empty() &&
+                        discovered.tool_call_opener.find(ts.tool_section_start) == 0) {
+                        std::string remainder = discovered.tool_call_opener.substr(ts.tool_section_start.length());
+                        LOG_DBG("Derived remainder: '%s'\n", remainder.c_str());
+                        if (remainder.length() > ts.function_prefix.length()) {
+                            ts.function_prefix = remainder;
+                        }
+                    }
+                    ts.function_suffix = discovered.function_name_suffix;
+                    ts.function_close  = discovered.function_closer;
+                }
+            }
+        }
+    }
+
+    // Fix for templates where tool_section_start matches function_prefix (double wrapping)
+    // e.g. Functionary: tool_section_start="<function=", function_prefix="<function="
+    if (!ts.tool_section_start.empty() && !ts.function_prefix.empty() && ts.tool_section_start == ts.function_prefix) {
+        LOG_DBG("tool_section_start matches function_prefix, clearing section start to avoid double wrapping\n");
+        ts.tool_section_start = "";
+    }
+
+    // Similar check for tool_section_end matching function_close
+    if (!ts.tool_section_end.empty() && !ts.function_close.empty() && ts.tool_section_end == ts.function_close) {
+        LOG_DBG("tool_section_end matches function_close, clearing section end to avoid double wrapping\n");
+        ts.tool_section_end = "";
+    }
+
+    // Handle nested container markers (e.g., DeepSeek R1 style)
+    // If function_suffix contains markdown code block (```), the template uses nested markers
+    // tool_section_start might be: <｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function
+    // We need to derive tool_section_end from the outer marker pattern
+    if (ts.function_suffix.find("```") != std::string::npos && !ts.tool_section_start.empty()) {
+        // Check if tool_section_start contains nested markers (both outer and per-call)
+        // Pattern: <X_calls_begin><X_call_begin>...
+        // We look for "calls" pattern which indicates an outer container
+        size_t calls_pos = ts.tool_section_start.find("calls");
+        if (calls_pos != std::string::npos && calls_pos < ts.tool_section_start.length()) {
+            // Find where the outer marker ends (after the first >)
+            size_t first_close = ts.tool_section_start.find('>', calls_pos);
+            if (first_close != std::string::npos && first_close < ts.tool_section_start.length() - 1) {
+                // Extract the outer marker (e.g., "<｜tool▁calls▁begin｜>")
+                std::string outer_start = ts.tool_section_start.substr(0, first_close + 1);
+                // Derive the outer end marker by replacing "begin" with "end"
+                size_t      begin_pos   = outer_start.find("begin");
+                if (begin_pos != std::string::npos) {
+                    std::string outer_end =
+                        outer_start.substr(0, begin_pos) + "end" + outer_start.substr(begin_pos + 5);
+                    ts.tool_section_end = outer_end;
+
+                    // Strip outer marker from function_prefix and function_opener if they were combined
+                    if (ts.tool_section_start.find(outer_start) == 0) {
+                        std::string remainder    = ts.tool_section_start.substr(outer_start.length());
+                        // Trim leading whitespace from remainder
+                        size_t      first_non_ws = remainder.find_first_not_of(" \t\n\r");
+                        if (first_non_ws != std::string::npos && first_non_ws > 0) {
+                            remainder = remainder.substr(first_non_ws);
+                        }
+
+                        // Concatenate with existing function_prefix (e.g. separator tag)
+                        // but avoid double-concatenation if already present
+                        if (!remainder.empty() && ts.function_prefix.find(remainder) == std::string::npos) {
+                            ts.function_prefix = remainder + ts.function_prefix;
+                        }
+                    }
+
+                    // Update tool_section_start to be just the outer marker
+                    ts.tool_section_start = outer_start;
+
+                    // Check if there's a fence in tool_call_closer that should be in function_close
+                    // (DeepSeek R1 wraps JSON in markdown blocks within the custom tags)
+                    if (discovered.tool_call_closer.find("```") != std::string::npos) {
+                        size_t fence_pos = discovered.tool_call_closer.find("```");
+                        // Include leading newlines if present before the fence
+                        while (fence_pos > 0 && (discovered.tool_call_closer[fence_pos - 1] == '\n' ||
+                                                 discovered.tool_call_closer[fence_pos - 1] == '\r')) {
+                            fence_pos--;
+                        }
+                        ts.function_close = discovered.tool_call_closer.substr(fence_pos);
+
+                        // Clip function_close to not include tool_section_end (if they were combined in differential analysis)
+                        if (!ts.tool_section_end.empty()) {
+                            size_t end_pos = ts.function_close.find(ts.tool_section_end);
+                            if (end_pos != std::string::npos) {
+                                ts.function_close = ts.function_close.substr(0, end_pos);
+                            }
+                        }
+
+                        // Further trim any trailing EOS or prompt garbage
+                        ts.function_close     = strip_eos_token(ts.function_close);
+                        size_t prompt_garbage = ts.function_close.find("<｜");
+                        if (prompt_garbage != std::string::npos && prompt_garbage > 0 &&
+                            ts.function_close.substr(prompt_garbage).find("Assistant") != std::string::npos) {
+                            ts.function_close = ts.function_close.substr(0, prompt_garbage);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // General cleanup for tool_section_end when tool_section_start uses token markers (<|...|> or <｜...｜>)
+    // If tool_section_start contains a token marker with "begin" and tool_section_end is messy (contains }
+    // or multiple markers), derive tool_section_end by finding matching end marker in tool_call_closer
+    if (!ts.tool_section_start.empty() && !discovered.tool_call_closer.empty()) {
+        // Check if tool_section_start contains a token marker
+        size_t start_opener_pos = find_token_opener(ts.tool_section_start, 0);
+        size_t start_closer_pos = find_token_closer(ts.tool_section_start, start_opener_pos);
+        if (start_opener_pos != std::string::npos && start_closer_pos != std::string::npos) {
+            size_t      opener_len    = get_token_opener_length(ts.tool_section_start, start_opener_pos);
+            // Extract the token content (between opener and closer)
+            std::string start_content = ts.tool_section_start.substr(start_opener_pos + opener_len,
+                                                                     start_closer_pos - start_opener_pos - opener_len);
+
+            // Check if tool_section_end needs cleanup (starts with } or contains multiple markers)
+            bool needs_cleanup = false;
+            if (!ts.tool_section_end.empty() && ts.tool_section_end[0] == '}') {
+                needs_cleanup = true;
+            }
+            // Count tokens in tool_section_end
+            size_t token_count = 0;
+            size_t pos         = 0;
+            while ((pos = find_token_opener(ts.tool_section_end, pos)) != std::string::npos) {
+                token_count++;
+                pos += get_token_opener_length(ts.tool_section_end, pos);
+            }
+            if (token_count > 1) {
+                needs_cleanup = true;
+            }
+
+            if (needs_cleanup) {
+                // Find matching end marker in tool_call_closer
+                // Look for a token that has similar content but with "end" instead of "begin"
+                pos = 0;
+                while ((pos = find_token_opener(discovered.tool_call_closer, pos)) != std::string::npos) {
+                    size_t end_closer_pos = find_token_closer(discovered.tool_call_closer, pos);
+                    if (end_closer_pos != std::string::npos) {
+                        size_t      op_len    = get_token_opener_length(discovered.tool_call_closer, pos);
+                        size_t      cl_len    = get_token_closer_length(discovered.tool_call_closer, end_closer_pos);
+                        std::string candidate = discovered.tool_call_closer.substr(pos, end_closer_pos + cl_len - pos);
+                        std::string candidate_content =
+                            discovered.tool_call_closer.substr(pos + op_len, end_closer_pos - pos - op_len);
+
+                        // Check if this candidate matches our start marker structure
+                        // Start content might be "tool▁calls▁begin" and candidate might be "tool▁calls▁end"
+                        size_t begin_in_start   = start_content.find("begin");
+                        size_t end_in_candidate = candidate_content.find("end");
+                        if (begin_in_start != std::string::npos && end_in_candidate != std::string::npos) {
+                            // Check if they share a common prefix (e.g., "tool▁calls▁")
+                            std::string start_base = start_content.substr(0, begin_in_start);
+                            std::string cand_base  = candidate_content.substr(0, end_in_candidate);
+                            if (start_base == cand_base) {
+                                ts.tool_section_end = candidate;
+                                LOG_DBG(
+                                    "Derived tool_section_end='%s' from tool_section_start='%s' using token matching\n",
+                                    ts.tool_section_end.c_str(), ts.tool_section_start.c_str());
+                                break;
+                            }
+                        }
+                    }
+                    pos += get_token_opener_length(discovered.tool_call_closer, pos);
+                }
+            }
+        }
+    }
+
+    // Determine argument format
+    if (!discovered.parameter_key_prefix.empty() && discovered.parameter_key_prefix.find('<') != std::string::npos) {
+        ts.argument_format = tool_call_structure::ARGS_TAGGED;
+        ts.arg_prefix      = discovered.parameter_key_prefix;
+        ts.arg_suffix      = discovered.parameter_key_suffix;
+        ts.arg_close       = discovered.parameter_closer;
+        ts.arg_separator   = discovered.argument_separator;
+
+        // Check for specific GLM-4 style key-value tags
+        // Format: <arg_key>key</arg_key>\n<arg_value>value</arg_value>
+        // Analyzer detects suffix as: </arg_key>\n<arg_value>
+        if (ts.arg_suffix.find("<arg_value>") != std::string::npos) {
+            ts.argument_format = tool_call_structure::ARGS_KEY_VALUE_TAGS;
+
+            // Clean up suffix to be just the key closer
+            size_t val_opener = ts.arg_suffix.find("<arg_value>");
+            if (val_opener != std::string::npos) {
+                // Extract just the </arg_key> part (trimming whitespace/newlines before <arg_value>)
+                std::string key_closer = ts.arg_suffix.substr(0, val_opener);
+                // Trim trailing whitespace/newlines
+                while (!key_closer.empty() &&
+                       (key_closer.back() == '\n' || key_closer.back() == '\r' || key_closer.back() == ' ')) {
+                    key_closer.pop_back();
+                }
+                ts.arg_suffix = key_closer;
+            }
+        }
+    } else {
+        ts.argument_format = tool_call_structure::ARGS_JSON;
+    }
+
+    LOG_DBG("%s: final markers: section_start='%s', section_end='%s', prefix='%s', close='%s'\n", __func__,
+            ts.tool_section_start.c_str(), ts.tool_section_end.c_str(), ts.function_prefix.c_str(),
+            ts.function_close.c_str());
+}
+
+void template_analyzer::analyze_bracket_tag_format(tool_call_structure &               ts,
+                                                   const internal_discovered_pattern & discovered) {
+    // Bracket-tag format: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} (Mistral Small 3.2)
+    ts.supports_tools  = true;
+    ts.function_format = tool_call_structure::FUNC_BRACKET_TAG;
+    ts.argument_format = tool_call_structure::ARGS_JSON;
+
+    // The function_opener contains the bracket tag before the function name (e.g., "[TOOL_CALLS]")
+    // Each tool call starts with this tag, so it's the per_call_start, not a section wrapper
+    // tool_section_start/end should be empty since there's no overall section wrapper
+    ts.tool_section_start = "";
+    ts.tool_section_end   = "";
+    ts.per_call_start     = discovered.function_opener;
+
+    // Extract markers from function_name_suffix (e.g., "[CALL_ID]call_0001[ARGS]" or just "[ARGS]")
+    // Pattern: [ID_MARKER]...[ARGS_MARKER] or just [ARGS_MARKER]
+    if (!discovered.function_name_suffix.empty()) {
+        // Find all bracket tags in the suffix
+        std::vector<std::string> tags;
+        size_t                   pos = 0;
+        while ((pos = discovered.function_name_suffix.find('[', pos)) != std::string::npos) {
+            size_t end = discovered.function_name_suffix.find(']', pos);
+            if (end != std::string::npos) {
+                tags.push_back(discovered.function_name_suffix.substr(pos, end - pos + 1));
+                pos = end + 1;
+            } else {
+                break;
+            }
+        }
+
+        // Classify tags: args marker contains "ARG", id marker contains "ID" or "CALL"
+        for (const auto & tag : tags) {
+            std::string upper_tag = tag;
+            for (auto & c : upper_tag) {
+                c = static_cast<char>(std::toupper(static_cast<unsigned char>(c)));
+            }
+            if (upper_tag.find("ARG") != std::string::npos) {
+                ts.args_marker = tag;
+            } else if (upper_tag.find("ID") != std::string::npos || upper_tag.find("CALL") != std::string::npos) {
+                ts.id_marker = tag;
+            }
+        }
+    }
+
+    LOG_DBG("FUNC_BRACKET_TAG: per_call_start='%s', id_marker='%s', args_marker='%s'\n", ts.per_call_start.c_str(),
+            ts.id_marker.c_str(), ts.args_marker.c_str());
+}
+
+void template_analyzer::analyze_recipient_based_format(tool_call_structure &               ts,
+                                                       const internal_discovered_pattern & discovered) {
+    // Recipient-based format (Functionary v3.2): >>>recipient\n{content}
+    // where recipient is either "all" (for content) or a function name (for tools)
+    ts.supports_tools  = true;
+    ts.function_format = tool_call_structure::FUNC_RECIPIENT_BASED;
+    ts.argument_format = tool_call_structure::ARGS_JSON;  // Python dict format, parse as JSON
+
+    // The tool_call_start_marker is used as the recipient delimiter
+    ts.tool_section_start = discovered.tool_call_start_marker;
+    ts.tool_section_end   = "";
+
+    // For recipient-based format, content is wrapped in tool_call_start_marker + "all\n"
+    // This needs to be detected and stripped. We detect this by checking if the
+    // content_start marker (from phase 1 analysis) starts with tool_call_start_marker
+    // If not already detected, infer it from the pattern.
+    // Note: This is set on the ContentStructure result, not ToolCallStructure
+    // The caller (analyze_template) will have the ContentStructure to modify
+
+    LOG_DBG("FUNC_RECIPIENT_BASED: delimiter='%s'\n", ts.tool_section_start.c_str());
+}
+
+void template_analyzer::analyze_markdown_code_block_format(tool_call_structure &               ts,
+                                                           const internal_discovered_pattern & discovered) {
+    // Markdown code block format (Cohere Command-R Plus):
+    // Action:
+    // ```json
+    // [
+    //     {
+    //         "tool_name": "...",
+    //         "parameters": {...}
+    //     }
+    // ]
+    // ```
+    ts.supports_tools  = true;
+    ts.function_format = tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK;
+    ts.argument_format = tool_call_structure::ARGS_JSON;
+
+    // Extract the code block marker (e.g., "Action:")
+    // The tool_call_start_marker should contain "Action:" followed by newline
+    if (!discovered.tool_call_start_marker.empty()) {
+        // Extract just the marker text (e.g., "Action:")
+        // The marker may be followed by whitespace/newline in the template
+        size_t marker_end = discovered.tool_call_start_marker.find_first_of(" \n\r\t");
+        if (marker_end != std::string::npos) {
+            ts.code_block_marker = discovered.tool_call_start_marker.substr(0, marker_end);
+        } else {
+            ts.code_block_marker = discovered.tool_call_start_marker;
+        }
+    }
+
+    // Extract the code block language (e.g., "json")
+    // For Command-R Plus format: Action:\n```json\n[...]
+    // The code fence is in tool_call_opener (before the function name), not function_name_suffix
+    if (!discovered.function_name_suffix.empty() && discovered.function_name_suffix.find("```") != std::string::npos) {
+        // Format: ```json or ```json\n
+        size_t code_fence_pos = discovered.function_name_suffix.find("```");
+        size_t lang_start     = code_fence_pos + 3;
+        // Find the end of the language identifier (newline, space, or end of string)
+        size_t lang_end       = discovered.function_name_suffix.find_first_of(" \n\r\t", lang_start);
+        if (lang_end != std::string::npos && lang_end > lang_start) {
+            ts.code_block_language = discovered.function_name_suffix.substr(lang_start, lang_end - lang_start);
+        } else {
+            // No language identifier after ```, will use "json" as default
+            ts.code_block_language = "json";
+        }
+    } else if (!discovered.tool_call_opener.empty() && discovered.tool_call_opener.find("```") != std::string::npos) {
+        // Code fence is in tool_call_opener (before the function name)
+        // Format: Action:\n```json\n[...
+        size_t code_fence_pos = discovered.tool_call_opener.find("```");
+        size_t lang_start     = code_fence_pos + 3;
+        // Find the end of the language identifier (newline, space, or end of string)
+        size_t lang_end       = discovered.tool_call_opener.find_first_of(" \n\r\t", lang_start);
+        if (lang_end != std::string::npos && lang_end > lang_start) {
+            ts.code_block_language = discovered.tool_call_opener.substr(lang_start, lang_end - lang_start);
+        } else {
+            // No language identifier after ```, will use "json" as default
+            ts.code_block_language = "json";
+        }
+    } else {
+        // Default to "json" if no code fence found
+        ts.code_block_language = "json";
+    }
+
+    // The tool_section_end should be the closing code fence: ```
+    if (!discovered.tool_call_closer.empty() && discovered.tool_call_closer.find("```") != std::string::npos) {
+        // Extract just the closing code fence (may have trailing content)
+        size_t fence_pos = discovered.tool_call_closer.find("```");
+        size_t fence_end = fence_pos + 3;
+        // Include any non-newline characters after ``` (like language identifier if present)
+        while (fence_end < discovered.tool_call_closer.length() && discovered.tool_call_closer[fence_end] != '\n' &&
+               discovered.tool_call_closer[fence_end] != '\r') {
+            fence_end++;
+        }
+        ts.tool_section_end = discovered.tool_call_closer.substr(fence_pos, fence_end - fence_pos);
+    } else {
+        // Default closing code fence
+        ts.tool_section_end = "```";
+    }
+
+    // JSON array format for function calls
+    ts.name_field = discovered.tool_name_field;
+    ts.args_field = discovered.tool_args_field;
+    ts.id_field   = discovered.tool_id_field;
+
+    LOG_DBG("FUNC_MARKDOWN_CODE_BLOCK: marker='%s', language='%s', section_end='%s'\n", ts.code_block_marker.c_str(),
+            ts.code_block_language.c_str(), ts.tool_section_end.c_str());
+}
diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
new file mode 100644
index 0000000000..0f4d153d06
--- /dev/null
+++ b/common/chat-auto-parser-generator.cpp
@@ -0,0 +1,250 @@
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "json-schema-to-grammar.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+
+#include <optional>
+
+using json = nlohmann::ordered_json;
+
+common_chat_params universal_peg_generator::generate_parser(const template_analysis_result & analysis,
+                                                            const common_chat_template &     tmpl,
+                                                            const struct templates_params &  inputs) {
+    common_chat_params data;
+
+    try {
+        LOG_DBG("%s\n", __func__);
+
+        // Patch messages if template requires non-null content
+        // Some templates (e.g., iquest) render null as "None" when concatenating strings
+        std::optional<json> messages_override;
+        if (analysis.tools.requires_nonnull_content && !inputs.messages.empty()) {
+            LOG_DBG("Patching null content to empty string (template requires non-null content)\n");
+            json patched_messages = inputs.messages;
+            for (auto & msg : patched_messages) {
+                if (msg.contains("content") && msg["content"].is_null()) {
+                    msg["content"] = "";
+                }
+            }
+            messages_override = patched_messages;
+        }
+
+        if (inputs.messages.empty()) {
+            // Some templates don't handle empty messages well - always leave something in
+            json message = {
+                { { "role", "user" }, { "content", "Hello" } }
+            };
+            messages_override.emplace(message);
+        }
+
+        // Calculate prompt first to detect forced thinking
+        data.prompt = common_chat_template_direct_apply(tmpl, inputs, messages_override);
+
+        // Determine if thinking is forced open based on prompt ending
+        bool thinking_forced_open = false;
+        if (analysis.content.reasoning_mode == content_structure::REASONING_FORCED_OPEN) {
+            if (inputs.enable_thinking) {
+                thinking_forced_open = true;
+                LOG_DBG("Thinking forced open based on template analysis\n");
+            } else {
+                // Template ends with reasoning start marker but thinking is disabled
+                // Append the end marker to close it
+                data.prompt += analysis.content.reasoning_end;
+                LOG_DBG("Appended reasoning end marker since thinking is disabled\n");
+            }
+        }
+        data.thinking_forced_open = thinking_forced_open;
+
+        // Build the unified parser
+        auto arena  = build_parser(analysis, tmpl, inputs, thinking_forced_open);
+        data.parser = arena.save();
+
+        // Determine format
+        bool has_tools =
+            inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+        if (has_tools && analysis.tools.supports_tools) {
+            // Unified format that handles both JSON and tagged tool calls
+            data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+            LOG_DBG("Generated unified parser with tool support (format: PEG_NATIVE)\n");
+        } else if (analysis.content.reasoning_mode != content_structure::REASONING_NONE) {
+            // Reasoning markers detected - use PEG parser to handle thinking blocks
+            data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+            LOG_DBG("Generated unified parser for reasoning handling (format: PEG_NATIVE)\n");
+        } else if (analysis.content.content_mode != content_structure::CONTENT_PLAIN) {
+            // Content markers detected - use PEG parser to strip them even without tools
+            data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+            LOG_DBG("Generated unified parser for content marker stripping (format: PEG_NATIVE)\n");
+        } else if (analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
+            // Recipient-based format (e.g., Functionary v3.2): >>>recipient\n{content}
+            // Need PEG parser to handle recipient delimiter parsing
+            data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+            LOG_DBG("Generated unified parser for recipient-based format (format: PEG_NATIVE)\n");
+        } else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
+            // Tag-with-name format (e.g., func_name\n{args} for Functionary)
+            // Need PEG parser to handle function name parsing
+            data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+            LOG_DBG("Generated unified parser for tag-with-name format (format: PEG_NATIVE)\n");
+        } else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
+            // Bracket-tag format (e.g., [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} for Mistral Small 3.2)
+            // Need PEG parser to handle bracket tag parsing
+            data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+            LOG_DBG("Generated unified parser for bracket-tag format (format: PEG_NATIVE)\n");
+        } else if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
+            // Prefixed-indexed format (e.g., Kimi-K2)
+            // Need PEG parser to handle namespace and indexed format
+            data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+            LOG_DBG("Generated unified parser for prefixed-indexed format (format: PEG_NATIVE)\n");
+        } else {
+            data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
+            LOG_DBG("Generated unified parser without tools or content markers (format: CONTENT_ONLY)\n");
+        }
+
+        // Determine trigger word for lazy grammar
+        std::string trigger_word;
+        if (!analysis.tools.tool_section_start.empty() ||
+            analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
+            trigger_word = analysis.tools.tool_section_start;
+        } else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
+            trigger_word = analysis.tools.function_prefix;
+        } else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
+                   analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
+            // For formats with per-call markers, use per_call_start as trigger
+            trigger_word = analysis.tools.per_call_start;
+        }
+
+        // Build grammar for tool calls
+        data.grammar_lazy = analysis.tools.supports_tools && has_tools;
+
+        // For FUNC_TAG_WITH_NAME with empty prefix (Functionary), disable lazy grammar
+        // since there's no clear trigger word - constrain from the start
+        if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
+            analysis.tools.function_prefix.empty()) {
+            data.grammar_lazy = false;
+        }
+
+        if (data.grammar_lazy) {
+            if (!trigger_word.empty()) {
+                data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_word });
+            }
+        }
+
+        // Build grammar
+        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+            if (inputs.tools.is_array()) {
+                for (const auto & tool : inputs.tools) {
+                    if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
+                        continue;
+                    }
+                    const auto & function = tool.at("function");
+                    if (function.contains("parameters")) {
+                        auto params = function.at("parameters");
+                        builder.resolve_refs(params);
+                    }
+                }
+            }
+            arena.build_grammar(builder, data.grammar_lazy);
+        });
+
+        // Set preserved tokens from analysis
+        data.preserved_tokens = analysis.preserved_tokens;
+
+        LOG_DBG("=== UNIFIED PEG PARSER GENERATION COMPLETED ===\n");
+
+    } catch (const std::exception & e) {
+        LOG_DBG("Unified parser generation failed: %s\n", e.what());
+        throw;
+    }
+
+    return data;
+}
+
+common_peg_arena universal_peg_generator::build_parser(const template_analysis_result & analysis,
+                                                       const common_chat_template &     tmpl,
+                                                       const struct templates_params &  inputs,
+                                                       bool                             thinking_forced_open) {
+    GGML_UNUSED(tmpl);
+
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        // Build reasoning block using ContentStructure
+        auto reasoning = p.build_reasoning_block(analysis.content, inputs.reasoning_format, thinking_forced_open);
+
+        // Build content block using ContentStructure
+        // Note: we don't pass tool_section_start here because content-before-tools handling
+        // is done inline in each branch below with p.content(p.until(marker))
+        auto content = p.build_content_block(analysis.content, inputs.reasoning_format);
+
+        // Build tool section using ToolCallStructure (if applicable)
+        bool has_tools =
+            inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+        if (has_tools && analysis.tools.supports_tools) {
+            bool force_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+            auto tool_section =
+                p.build_tool_section(analysis.tools, inputs.tools, inputs.parallel_tool_calls, force_calls);
+
+            // Compose: reasoning -> content before tools -> tool_section -> trailing content
+            // When thinking is forced open, the reasoning block expects </think>.
+            // For tool-only messages (no thinking content), the model may output tools directly
+            // without the </think> tag, so we need to make reasoning optional in that case.
+            // But if reasoning_format is NONE, the reasoning block is already eps() - don't wrap it
+            // in optional() as that would generate invalid grammar.
+            auto reasoning_for_tools =
+                (thinking_forced_open && inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE) ?
+                    p.optional(reasoning) :
+                    reasoning;
+
+            if (!analysis.tools.tool_section_start.empty()) {
+                // With section markers: look for start marker to delimit content
+                auto content_before_tools = p.content(p.until(analysis.tools.tool_section_start));
+                return p.sequence({ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section,
+                                    p.space(), p.optional(p.content(p.rest())), p.end() });
+            }
+            if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
+                !analysis.tools.function_prefix.empty()) {
+                // Tag-with-name format (e.g., >>>func_name): content stops at function prefix
+                auto content_before_tools = p.content(p.until(analysis.tools.function_prefix));
+                return p.sequence(
+                    { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
+            }
+            if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
+                // Functionary-style format: tool call starts immediately (e.g., func_name\n{args})
+                // No content before tools in this format - the entire output is the tool call
+                return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
+            }
+            if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
+                analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
+                // Bracket-tag (Mistral Small 3.2) or prefixed-indexed (Kimi-K2) format:
+                // Tool calls start with per_call_start marker (e.g., [TOOL_CALLS], <|tool_call_begin|>)
+                if (!analysis.tools.per_call_start.empty()) {
+                    auto content_before_tools = p.content(p.until(analysis.tools.per_call_start));
+                    return p.sequence(
+                        { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
+                }
+                // Fallback: no content before tools
+                return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
+            }
+            if (analysis.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK &&
+                !analysis.tools.code_block_marker.empty()) {
+                // Markdown code block format (Cohere Command-R Plus):
+                // Content stops at the code_block_marker (e.g., "Action:")
+                auto content_before_tools = p.content(p.until(analysis.tools.code_block_marker));
+                return p.sequence(
+                    { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
+            }
+            // No section markers (raw JSON format): content must stop at JSON object start
+            // Tool calls start with "{", so use that as a delimiter
+            auto content_before_tools = p.content(p.until("{"));
+            return p.sequence(
+                { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
+        }
+
+        // No tools - just reasoning (if any) followed by content
+        return p.sequence({ reasoning, p.space(), content, p.end() });
+    });
+
+    return parser;
+}
diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp
new file mode 100644
index 0000000000..c63012c2a8
--- /dev/null
+++ b/common/chat-auto-parser-helpers.cpp
@@ -0,0 +1,1419 @@
+#include "chat-auto-parser-helpers.h"
+
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "log.h"
+
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::ordered_json;
+
+bool string_ends_with(const std::string & str, const std::string & suffix) {
+    return str.size() >= suffix.size() && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
+}
+
+void trim_whitespace(std::string & str) {
+    if (str.empty()) {
+        return;
+    }
+    size_t first = str.find_first_not_of(" \n\t\r");
+    if (first == std::string::npos) {
+        str.clear();
+        return;
+    }
+    size_t last = str.find_last_not_of(" \n\t\r");
+    str         = str.substr(first, (last - first + 1));
+}
+
+void trim_trailing_newlines(std::string & str) {
+    while (!str.empty() && (str.back() == '\n' || str.back() == '\r')) {
+        str.pop_back();
+    }
+}
+
+size_t count_non_whitespace(const std::string & str) {
+    size_t count = 0;
+    for (char c : str) {
+        if (c != ' ' && c != '\t' && c != '\n' && c != '\r') {
+            count++;
+        }
+    }
+    return count;
+}
+
+size_t find_last_of_any(const std::string & str, const std::string & chars, size_t start_pos) {
+    size_t last_pos = std::string::npos;
+    for (char c : chars) {
+        size_t pos = str.rfind(c, start_pos);
+        if (pos != std::string::npos && (last_pos == std::string::npos || pos > last_pos)) {
+            last_pos = pos;
+        }
+    }
+    return last_pos;
+}
+
+std::string extract_tag_name(const std::string & tag) {
+    if (tag.empty() || tag[0] != '<') {
+        return "";
+    }
+    std::string tag_name    = tag.substr(1);
+    size_t      end_bracket = tag_name.find_first_of(" >");
+    if (end_bracket != std::string::npos) {
+        tag_name = tag_name.substr(0, end_bracket);
+    }
+    return tag_name;
+}
+
+std::string create_closing_tag(const std::string & opening_tag) {
+    if (opening_tag.empty()) {
+        return "";
+    }
+    if (opening_tag[0] == '<') {
+        std::string name = extract_tag_name(opening_tag);
+        return "</" + name + ">";
+    }
+    if (opening_tag.front() == '[' && opening_tag.back() == ']') {
+        std::string name = opening_tag.substr(1, opening_tag.length() - 2);
+        return "[/" + name + "]";
+    }
+    return "";
+}
+
+std::string find_common_prefix(const std::vector<std::string> & strings) {
+    if (strings.empty()) {
+        return "";
+    }
+    if (strings.size() == 1) {
+        return strings[0];
+    }
+
+    std::string common = strings[0];
+    for (size_t i = 1; i < strings.size(); ++i) {
+        const std::string & current = strings[i];
+        std::string         temp_common;
+        for (size_t j = 0; j < common.length() && j < current.length(); ++j) {
+            if (common[j] == current[j]) {
+                temp_common += common[j];
+            } else {
+                break;
+            }
+        }
+        common = temp_common;
+    }
+    return common;
+}
+
+std::string find_common_suffix_generic(const std::vector<std::string> & strings) {
+    if (strings.empty()) {
+        return "";
+    }
+    if (strings.size() == 1) {
+        return strings[0];
+    }
+
+    std::string common = strings[0];
+    for (size_t i = 1; i < strings.size(); ++i) {
+        const std::string & current = strings[i];
+        std::string         temp_common;
+        size_t              min_len = std::min(common.length(), current.length());
+        for (size_t j = 0; j < min_len; ++j) {
+            size_t pos_common  = common.length() - j - 1;
+            size_t pos_current = current.length() - j - 1;
+            if (common[pos_common] == current[pos_current]) {
+                temp_common = common[pos_common] + temp_common;
+            } else {
+                break;
+            }
+        }
+        common = temp_common;
+    }
+    return common;
+}
+
+std::string find_common_substring_limited(const std::vector<std::string> & strings,
+                                          size_t                           max_length,
+                                          const std::string &              delimiters) {
+    std::string common = find_common_prefix(strings);
+    if (common.length() > max_length) {
+        size_t pos = find_last_of_any(common, delimiters, common.length() - 1);
+        if (pos != std::string::npos && pos > 0) {
+            return common.substr(0, pos + 1);
+        }
+        return common.substr(0, max_length);
+    }
+    return common;
+}
+
+std::string apply_template(common_chat_template      &    tmpl,
+                           const struct templates_params & inputs,
+                           const std::optional<json> &     messages_override,
+                           const std::optional<json> &     tools_override,
+                           const std::optional<json> &     additional_context) {
+    struct templates_params final_inputs(inputs);
+    final_inputs.messages = messages_override ? *messages_override : inputs.messages;
+    if (tools_override) {
+        final_inputs.tools = *tools_override;
+    } else {
+        final_inputs.tools = inputs.tools.empty() ? json() : inputs.tools;
+    }
+    final_inputs.add_generation_prompt            = inputs.add_generation_prompt;
+    final_inputs.extra_context                    = inputs.extra_context;
+    final_inputs.extra_context["enable_thinking"] = inputs.enable_thinking;
+    if (additional_context) {
+        final_inputs.extra_context.merge_patch(*additional_context);
+    }
+
+    try {
+        return common_chat_template_direct_apply(tmpl, inputs);
+    } catch (const std::exception & e) {
+        LOG_ERR("Template application failed: %s\n", e.what());
+        return "";
+    }
+}
+
+std::string adjust_to_token_boundary(const std::string & str) {
+    if (str.empty()) {
+        return str;
+    }
+
+    // Check if the string ends in the middle of a <|...|> token
+    // Look for unmatched <| at the end
+
+    // Find the last <| in the string
+    size_t last_open = str.rfind("<|");
+    if (last_open == std::string::npos) {
+        return str;  // No special tokens
+    }
+
+    // Find if there's a |> after the last <|
+    size_t matching_close = str.find("|>", last_open + 2);
+    if (matching_close != std::string::npos) {
+        // The token is complete, return as-is
+        return str;
+    }
+
+    // The string is truncated mid-token
+    // Truncate to just before the incomplete token
+    std::string result = str.substr(0, last_open);
+
+    // Trim any trailing whitespace
+    while (!result.empty() && (result.back() == ' ' || result.back() == '\t' || result.back() == '\n')) {
+        result.pop_back();
+    }
+
+    return result;
+}
+
+// Fullwidth vertical bar: ｜ (U+FF5C) is 3 bytes in UTF-8: 0xEF 0xBD 0x9C
+static const std::string FULLWIDTH_PIPE   = "\xef\xbd\x9c";        // ｜
+static const std::string TOKEN_OPENER_STD = "<|";
+static const std::string TOKEN_OPENER_FW  = "<" + FULLWIDTH_PIPE;  // <｜
+static const std::string TOKEN_CLOSER_STD = "|>";
+static const std::string TOKEN_CLOSER_FW  = FULLWIDTH_PIPE + ">";  // ｜>
+
+size_t find_token_opener(const std::string & str, size_t start_pos) {
+    size_t pos_std = str.find(TOKEN_OPENER_STD, start_pos);
+    size_t pos_fw  = str.find(TOKEN_OPENER_FW, start_pos);
+
+    if (pos_std == std::string::npos) {
+        return pos_fw;
+    }
+    if (pos_fw == std::string::npos) {
+        return pos_std;
+    }
+    return std::min(pos_std, pos_fw);
+}
+
+size_t find_token_closer(const std::string & str, size_t start_pos) {
+    size_t pos_std = str.find(TOKEN_CLOSER_STD, start_pos);
+    size_t pos_fw  = str.find(TOKEN_CLOSER_FW, start_pos);
+
+    if (pos_std == std::string::npos) {
+        return pos_fw;
+    }
+    if (pos_fw == std::string::npos) {
+        return pos_std;
+    }
+    return std::min(pos_std, pos_fw);
+}
+
+size_t get_token_opener_length(const std::string & str, size_t pos) {
+    if (pos >= str.length()) {
+        return 0;
+    }
+    if (str.compare(pos, TOKEN_OPENER_FW.length(), TOKEN_OPENER_FW) == 0) {
+        return TOKEN_OPENER_FW.length();  // 4 bytes for <｜
+    }
+    if (str.compare(pos, TOKEN_OPENER_STD.length(), TOKEN_OPENER_STD) == 0) {
+        return TOKEN_OPENER_STD.length();  // 2 bytes for <|
+    }
+    return 0;
+}
+
+size_t get_token_closer_length(const std::string & str, size_t pos) {
+    if (pos >= str.length()) {
+        return 0;
+    }
+    if (str.compare(pos, TOKEN_CLOSER_FW.length(), TOKEN_CLOSER_FW) == 0) {
+        return TOKEN_CLOSER_FW.length();  // 4 bytes for ｜>
+    }
+    if (str.compare(pos, TOKEN_CLOSER_STD.length(), TOKEN_CLOSER_STD) == 0) {
+        return TOKEN_CLOSER_STD.length();  // 2 bytes for |>
+    }
+    return 0;
+}
+
+std::string strip_eos_token(const std::string & str) {
+    if (str.empty()) {
+        return str;
+    }
+
+    // Find the last token in the string
+    // We need to find a token that looks like an EOS marker
+    // Common patterns:
+    // - <|eot_id|>, <|eos|>, <|end|>, <|endoftext|>
+    // - <｜end▁of▁sentence｜> (DeepSeek fullwidth)
+
+    size_t last_closer = std::string::npos;
+    size_t search_pos  = str.length();
+
+    // Search backwards for the last token closer
+    while (search_pos > 0) {
+        // Check for fullwidth closer first (it's longer)
+        if (search_pos >= TOKEN_CLOSER_FW.length()) {
+            size_t check_pos = search_pos - TOKEN_CLOSER_FW.length();
+            if (str.compare(check_pos, TOKEN_CLOSER_FW.length(), TOKEN_CLOSER_FW) == 0) {
+                last_closer = check_pos;
+                break;
+            }
+        }
+        // Check for standard closer
+        if (search_pos >= TOKEN_CLOSER_STD.length()) {
+            size_t check_pos = search_pos - TOKEN_CLOSER_STD.length();
+            if (str.compare(check_pos, TOKEN_CLOSER_STD.length(), TOKEN_CLOSER_STD) == 0) {
+                last_closer = check_pos;
+                break;
+            }
+        }
+        search_pos--;
+    }
+
+    if (last_closer == std::string::npos) {
+        return str;  // No token closer found
+    }
+
+    // Find the corresponding opener
+    size_t opener_search_start = (last_closer > 100) ? last_closer - 100 : 0;
+    size_t last_opener         = std::string::npos;
+    size_t opener_len          = 0;
+
+    for (size_t pos = opener_search_start; pos < last_closer; pos++) {
+        size_t len = get_token_opener_length(str, pos);
+        if (len > 0) {
+            last_opener = pos;
+            opener_len  = len;
+        }
+    }
+
+    if (last_opener == std::string::npos) {
+        return str;  // No matching opener found
+    }
+
+    // Extract the token content to check if it's an EOS marker
+    size_t closer_len     = get_token_closer_length(str, last_closer);
+    size_t content_start  = last_opener + opener_len;
+    size_t content_length = last_closer - content_start;
+
+    if (content_length == 0 || content_length > 50) {
+        return str;  // Invalid or too long token content
+    }
+
+    std::string token_content = str.substr(content_start, content_length);
+
+    // Convert to lowercase for comparison (ASCII only, sufficient for EOS markers)
+    std::string lower_content;
+    for (char c : token_content) {
+        lower_content += (c >= 'A' && c <= 'Z') ? (c + 32) : c;
+    }
+
+    // Check if this looks like an EOS token
+    // True EOS tokens:
+    //   - <|eos|>, <|eot_id|>, <|end_of_text|>, <|endoftext|>
+    //   - <｜end▁of▁sentence｜> (DeepSeek fullwidth)
+    // NOT EOS tokens (structural markers):
+    //   - <|END_ACTION|>, <|TOOL_CALL_END|>, <|end_thinking|>, etc.
+
+    bool is_eos = false;
+
+    // Check for specific EOS patterns
+    if (lower_content == "eos" || lower_content == "eot_id" || lower_content == "eot" ||
+        lower_content == "end_of_text" || lower_content == "endoftext") {
+        is_eos = true;
+    }
+    // DeepSeek's end_of_sentence uses fullwidth underscore (▁) which is preserved in lower_content
+    // The token content would be "end▁of▁sentence" (with ▁ = U+2581)
+    else if (token_content.find("sentence") != std::string::npos ||
+             token_content.find("\xe2\x96\x81of\xe2\x96\x81sentence") != std::string::npos) {
+        is_eos = true;
+    }
+
+    if (!is_eos) {
+        return str;  // Not an EOS token
+    }
+
+    // Strip the EOS token
+    std::string result = str.substr(0, last_opener);
+
+    LOG_DBG("Stripped EOS token '%s' from string\n",
+            str.substr(last_opener, last_closer + closer_len - last_opener).c_str());
+
+    return result;
+}
+
+std::string find_string_difference(const std::string & base, const std::string & extended) {
+    size_t common_prefix = 0;
+    while (common_prefix < base.length() && common_prefix < extended.length() &&
+           base[common_prefix] == extended[common_prefix]) {
+        common_prefix++;
+    }
+    return extended.substr(common_prefix);
+}
+
+std::string extract_json_field_name(const std::string &              opener,
+                                    const std::string &              default_name,
+                                    const std::vector<std::string> & candidates) {
+    for (const auto & candidate : candidates) {
+        std::string pattern = "\"" + candidate + "\"";
+        if (opener.find(pattern) != std::string::npos) {
+            LOG_DBG("Found JSON field name '%s' in opener\n", candidate.c_str());
+            return candidate;
+        }
+    }
+    return default_name;
+}
+
+std::string find_closing_pattern(const std::string & diff, size_t func_pos) {
+    std::vector<std::string> closers = { "</", "}", "]", ">", " " };
+
+    std::string best_pattern;
+    size_t      best_pos = std::string::npos;
+
+    for (const auto & pattern : closers) {
+        size_t pos = diff.find(pattern, func_pos);
+        if (pos != std::string::npos) {
+            if (pos < best_pos) {
+                if (pattern == "</") {
+                    size_t end_pos = diff.find('>', pos);
+                    if (end_pos != std::string::npos) {
+                        best_pattern = diff.substr(pos, end_pos - pos + 1);
+                        best_pos     = pos;
+                    }
+                } else {
+                    best_pattern = pattern;
+                    best_pos     = pos;
+                }
+            }
+        }
+    }
+    return best_pattern;
+}
+
+std::string find_tool_call_start(const std::string & diff) {
+    std::vector<std::string> start_patterns = { "<", "[", "{", "call", "func", "tool", "TOOL" };
+    for (const auto & pattern : start_patterns) {
+        size_t pos = diff.find(pattern);
+        if (pos < 5) {
+            if (pattern == "<") {
+                size_t end_pos = diff.find('>', pos);
+                if (end_pos != std::string::npos) {
+                    return diff.substr(pos, end_pos - pos + 1);
+                }
+            }
+            if (pattern == "[" || pattern == "{") {
+                size_t chunk_len = std::min(diff.length() - pos, (size_t) 60);
+                return diff.substr(pos, chunk_len);
+            }
+
+            size_t end_pos = diff.find_first_of(">]} \n", pos);
+            if (end_pos != std::string::npos) {
+                if (diff[end_pos] == '>' || diff[end_pos] == ']' || diff[end_pos] == '}') {
+                    return diff.substr(pos, end_pos - pos + 1);
+                }
+                return diff.substr(pos, end_pos - pos);
+            }
+            return diff.substr(pos, pattern.length());
+        }
+    }
+    return "";
+}
+
+std::string find_tool_call_end(const std::string & diff, size_t func_pos) {
+    char        opener_char = 0;
+    std::string start_tag_name;
+
+    std::string openers         = "[{<";
+    size_t      last_opener_pos = std::string::npos;
+    for (char c : openers) {
+        size_t p = diff.rfind(c, func_pos);
+        if (p != std::string::npos) {
+            if (last_opener_pos == std::string::npos || p > last_opener_pos) {
+                last_opener_pos = p;
+                opener_char     = c;
+            }
+        }
+    }
+
+    size_t unclosed_bracket = diff.rfind('[', func_pos);
+    if (unclosed_bracket != std::string::npos) {
+        size_t closer = diff.find(']', unclosed_bracket);
+        if (closer == std::string::npos || closer > func_pos) {
+            opener_char = '[';
+        }
+    }
+
+    if (opener_char == '<') {
+        size_t tag_start = diff.find('<', last_opener_pos);
+        if (tag_start != std::string::npos) {
+            // Include '=' in search to handle <function=name> style tags
+            // where the closing tag is </function>, not </function=name>
+            size_t tag_end = diff.find_first_of(" >=\n", tag_start);
+            if (tag_end != std::string::npos) {
+                start_tag_name = diff.substr(tag_start + 1, tag_end - (tag_start + 1));
+            }
+        }
+    }
+
+    if (!start_tag_name.empty()) {
+        std::string expected_closer = "</" + start_tag_name + ">";
+        size_t      pos             = diff.find(expected_closer, func_pos);
+        if (pos != std::string::npos) {
+            if (opener_char == '[') {
+                size_t bracket_pos = diff.rfind(']', pos);
+                if (bracket_pos != std::string::npos && bracket_pos > func_pos) {
+                    return diff.substr(bracket_pos, (pos + expected_closer.length()) - bracket_pos);
+                }
+            }
+            return expected_closer;
+        }
+    }
+
+    std::vector<std::string> end_patterns = { "</", "]", "}", ">", "```", "\n", " " };
+    std::string              best_pattern;
+    size_t                   best_pos = std::string::npos;
+
+    auto is_structural = [](const std::string & s) {
+        if (s.empty()) {
+            return false;
+        }
+        return s[0] == ']' || s[0] == '}' || s[0] == '>' || (s.size() >= 2 && s.substr(0, 2) == "</") ||
+               (s.size() >= 3 && s.substr(0, 3) == "```");
+    };
+
+    for (const auto & pattern : end_patterns) {
+        size_t pos = diff.find(pattern, func_pos);
+        if (pos == std::string::npos) {
+            continue;
+        }
+
+        bool current_is_struct = is_structural(pattern);
+        bool best_is_struct    = is_structural(best_pattern);
+
+        bool better = false;
+        if (best_pattern.empty()) {
+            better = true;
+        } else if (pos < best_pos) {
+            better = !(best_is_struct && !current_is_struct) &&
+                     !(opener_char == '[' && best_pattern[0] == ']' && pattern[0] == '}');
+        } else {
+            if (!best_is_struct && current_is_struct && pos < best_pos + 400) {
+                better = true;
+            } else if (best_is_struct && current_is_struct && opener_char == '[' && pattern[0] == ']' &&
+                       best_pattern[0] == '}') {
+                if (pos < best_pos + 100) {
+                    better = true;
+                }
+            }
+        }
+
+        if (better) {
+            best_pattern = pattern;
+            best_pos     = pos;
+
+            if (current_is_struct && (pattern == "]" || pattern == "}" || pattern == "```")) {
+                size_t tag_start = diff.find('<', best_pos + pattern.length());
+                if (tag_start != std::string::npos && tag_start < best_pos + pattern.length() + 5) {
+                    size_t tag_end = diff.find('>', tag_start);
+                    if (tag_end != std::string::npos) {
+                        best_pattern = diff.substr(best_pos, tag_end - best_pos + 1);
+                    }
+                }
+            }
+        }
+    }
+
+    return best_pattern;
+}
+
+std::string infer_tool_call_opener(const std::string & diff1, const std::string & diff2, const std::string & diff3) {
+    std::vector<std::string> differences = { diff1, diff2, diff3 };
+    return find_common_prefix(differences);
+}
+
+std::string infer_tool_call_closer(const std::string & diff1, const std::string & diff2, const std::string & diff3) {
+    std::vector<std::string> differences = { diff1, diff2, diff3 };
+    return find_common_suffix_generic(differences);
+}
+
+internal_discovered_pattern extract_patterns_from_differences(const std::string & tool1_diff,
+                                                              const std::string & tool2_diff,
+                                                              const std::string & tool3_diff,
+                                                              const std::string & tool1_full) {
+    LOG_DBG("%s\n", __func__);
+
+    internal_discovered_pattern patterns;
+
+    size_t func1_pos = tool1_diff.rfind("test_function_name");
+    size_t func2_pos = tool2_diff.rfind("test_function_name");
+
+    if (func1_pos != std::string::npos && func2_pos != std::string::npos) {
+        patterns.tool_call_opener = tool1_diff.substr(0, func1_pos);
+
+        if (tool1_full.length() >= tool1_diff.length()) {
+            size_t diff_start = tool1_full.length() - tool1_diff.length();
+
+            if (diff_start > 0 && tool1_full[diff_start - 1] == '<' && !patterns.tool_call_opener.empty() &&
+                patterns.tool_call_opener[0] != '<') {
+                patterns.tool_call_opener = "<" + patterns.tool_call_opener;
+            }
+        }
+
+        if (func1_pos == 0 && !tool1_full.empty()) {
+            size_t func_in_full = tool1_full.rfind("test_function_name");
+            if (func_in_full != std::string::npos && func_in_full > 0) {
+                // Look backwards from function name to find prefix pattern
+                // Find where the prefix ends (skip whitespace immediately before function name)
+                size_t prefix_end = func_in_full;
+                while (prefix_end > 0 && (tool1_full[prefix_end - 1] == ' ' || tool1_full[prefix_end - 1] == '\t')) {
+                    prefix_end--;
+                }
+
+                // Find where the prefix starts by looking for newline or alphanumeric boundary
+                size_t prefix_start = prefix_end;
+                while (prefix_start > 0) {
+                    char c = tool1_full[prefix_start - 1];
+                    // Stop at newline
+                    if (c == '\n' || c == '\r') {
+                        break;
+                    }
+                    // Stop if we hit alphanumeric (probably content, not a prefix delimiter)
+                    if (std::isalnum(static_cast<unsigned char>(c)) || c == '_') {
+                        prefix_start = prefix_end;  // Reset - no valid prefix found
+                        break;
+                    }
+                    prefix_start--;
+                }
+
+                // Extract the prefix if we found something meaningful
+                if (prefix_start < prefix_end) {
+                    std::string prefix      = tool1_full.substr(prefix_start, prefix_end - prefix_start);
+                    // Validate: prefix should contain non-whitespace and be reasonable length
+                    bool        has_content = false;
+                    for (char c : prefix) {
+                        if (c != ' ' && c != '\t' && c != '\n' && c != '\r') {
+                            has_content = true;
+                            break;
+                        }
+                    }
+                    if (has_content && prefix.length() >= 2 && prefix.length() <= 20) {
+                        LOG_DBG("Found prefix pattern in full output: '%s'\n", prefix.c_str());
+                        patterns.function_opener        = prefix;
+                        patterns.tool_call_start_marker = prefix;
+                    }
+                }
+            }
+        }
+
+        patterns.tool_name_field = extract_json_field_name(patterns.tool_call_opener, "name",
+                                                           { "tool_name", "name", "function_name", "function" });
+
+        patterns.tool_args_field =
+            extract_json_field_name(patterns.tool_call_opener + tool1_diff.substr(func1_pos), "arguments",
+                                    { "parameters", "arguments", "args", "params", "input" });
+
+        patterns.tool_id_field =
+            extract_json_field_name(tool1_diff, "", { "tool_call_id", "tool_id", "id", "call_id" });
+
+        size_t param1_pos       = tool2_diff.find("\"param1\"");
+        bool   param_has_quotes = (param1_pos != std::string::npos);
+        size_t param2_pos       = tool2_diff.find("\"param2\"");
+        size_t value1_pos       = tool2_diff.find("\"value1\"");
+
+        if (param1_pos == std::string::npos) {
+            param1_pos = tool2_diff.find("param1");
+        }
+        if (param_has_quotes && param1_pos != std::string::npos) {
+            param1_pos++;
+        }
+        if (param2_pos == std::string::npos) {
+            param2_pos = tool2_diff.find("param2");
+        }
+        if (param_has_quotes && param2_pos != std::string::npos) {
+            param2_pos++;
+        }
+        if (value1_pos == std::string::npos) {
+            value1_pos = tool2_diff.find("value1");
+        }
+        // Only skip quote if value was actually found quoted
+        bool value_has_quotes = (value1_pos != std::string::npos && tool2_diff[value1_pos] == '"');
+        if (value_has_quotes) {
+            value1_pos++;
+        }
+
+        if (param1_pos != std::string::npos && value1_pos != std::string::npos) {
+            size_t      search_start = (param1_pos > 20) ? param1_pos - 20 : 0;
+            std::string pre_param    = tool2_diff.substr(search_start, param1_pos - search_start);
+
+            size_t delim_pos = pre_param.find_last_of('\n');
+            if (delim_pos == std::string::npos) {
+                delim_pos = pre_param.find_last_of('>');
+            }
+
+            if (delim_pos != std::string::npos) {
+                patterns.parameter_key_prefix = pre_param.substr(delim_pos + 1);
+
+                // If prefix is empty after '>', check for GLM-style key-value tags
+                // Pattern: <arg_key>param1</arg_key><arg_value>value1</arg_value>
+                // In this case, the '>' ends the opening tag, and we should include the whole tag
+                if (patterns.parameter_key_prefix.empty() && delim_pos > 0) {
+                    // Look for matching '<' before the '>'
+                    size_t open_bracket = pre_param.rfind('<', delim_pos);
+                    if (open_bracket != std::string::npos) {
+                        // Extract the whole tag as the prefix
+                        patterns.parameter_key_prefix = pre_param.substr(open_bracket);
+                    }
+                }
+            } else {
+                size_t start_marker = pre_param.find_last_of("<{[ \"");
+                if (start_marker != std::string::npos) {
+                    patterns.parameter_key_prefix = pre_param.substr(start_marker);
+                } else {
+                    patterns.parameter_key_prefix = pre_param;
+                }
+            }
+
+            trim_whitespace(patterns.parameter_key_prefix);
+
+            size_t key_end = param1_pos + std::string("param1").length();
+            if (value1_pos > key_end) {
+                patterns.parameter_key_suffix = tool2_diff.substr(key_end, value1_pos - key_end);
+            }
+
+            size_t value1_end = value1_pos + std::string("value1").length();
+            if (value1_end < tool2_diff.length()) {
+                // Try to find XML-style closing tag like </parameter>
+                size_t close_start = tool2_diff.find("</", value1_end);
+                if (close_start != std::string::npos) {
+                    size_t close_end = tool2_diff.find('>', close_start);
+                    if (close_end != std::string::npos) {
+                        patterns.parameter_closer = tool2_diff.substr(close_start, close_end - close_start + 1);
+                    }
+                }
+            }
+        }
+
+        const std::string & func_context = tool1_diff;
+        size_t              open_pos     = func_context.rfind('<', func1_pos);
+        if (open_pos != std::string::npos && open_pos < func1_pos) {
+            size_t close_pos = func_context.find('>', open_pos);
+            if (close_pos != std::string::npos && close_pos < func1_pos) {
+                bool is_adjacent = true;
+                for (size_t k = close_pos + 1; k < func1_pos; ++k) {
+                    char c = func_context[k];
+                    if (c != ' ' && c != '\t' && c != '\n' && c != '\r') {
+                        is_adjacent = false;
+                        break;
+                    }
+                }
+                if (is_adjacent) {
+                    patterns.function_opener = func_context.substr(open_pos, close_pos - open_pos + 1);
+                }
+            } else {
+                patterns.function_opener = func_context.substr(open_pos, func1_pos - open_pos);
+            }
+        }
+
+        if (func1_pos > 0 && patterns.function_opener.empty()) {
+            size_t prefix_end = func1_pos;
+            // Skip whitespace immediately before function name
+            while (prefix_end > 0 && (func_context[prefix_end - 1] == ' ' || func_context[prefix_end - 1] == '\t')) {
+                prefix_end--;
+            }
+
+            // Find prefix start - look for newline or alphanumeric boundary
+            size_t prefix_start = prefix_end;
+            while (prefix_start > 0) {
+                char c = func_context[prefix_start - 1];
+                if (c == '\n' || c == '\r') {
+                    break;
+                }
+                if (std::isalnum(static_cast<unsigned char>(c)) || c == '_') {
+                    prefix_start = prefix_end;  // Reset - no valid prefix
+                    break;
+                }
+                prefix_start--;
+            }
+
+            if (prefix_start < prefix_end) {
+                // ...
+            }
+        }
+
+        // Fallback: look for standard delimiters
+        if (patterns.function_opener.empty()) {
+            for (int i = (int) func1_pos - 1; i >= 0; i--) {
+                if (func_context[i] == '{' || func_context[i] == '[' || func_context[i] == '(' ||
+                    func_context[i] == '<') {
+                    patterns.function_opener = func_context.substr(i, func1_pos - i);
+                    break;
+                }
+            }
+        }
+
+        size_t func_name_end = func1_pos + std::string("test_function_name").length();
+        if (func_name_end < func_context.length()) {
+            char next_char = func_context[func_name_end];
+            if (next_char == '>' || next_char == ']' || next_char == '}') {
+                patterns.function_name_suffix = std::string(1, next_char);
+            } else if (next_char == '"') {
+                if (func_name_end + 1 < func_context.length() && func_context[func_name_end + 1] == '>') {
+                    patterns.function_name_suffix = "\">";
+                } else {
+                    patterns.function_name_suffix = "\"";
+                }
+            } else if (next_char == '<') {
+                // Check if it's an XML-like tag suffix (e.g. <|tool_call_argument_begin|>)
+                // But NOT if it's a closing tag (e.g., </tool_call>) - that should be function_closer
+                if (func_name_end + 1 < func_context.length() && func_context[func_name_end + 1] == '/') {
+                    // This is a closing tag like </tool_call>, not a suffix
+                    // Leave function_name_suffix empty; function_closer will capture this
+                } else {
+                    size_t tag_close = func_context.find('>', func_name_end);
+                    if (tag_close != std::string::npos) {
+                        // It seems to be a tag, use it as suffix
+                        patterns.function_name_suffix = func_context.substr(func_name_end, tag_close - func_name_end + 1);
+                    }
+                }
+            } else if (next_char == '[') {
+                // Bracket-tag format: [CALL_ID]id[ARGS] (Mistral Small 3.2 style)
+                // Find where the JSON arguments start (at '{')
+                size_t json_start = func_context.find('{', func_name_end);
+                if (json_start != std::string::npos) {
+                    patterns.function_name_suffix = func_context.substr(func_name_end, json_start - func_name_end);
+                    LOG_DBG("Found bracket-tag suffix: '%s'\n", patterns.function_name_suffix.c_str());
+                }
+            } else if (next_char == ':') {
+                // Indexed format: function_name:0<|marker|> or function_name:0{args}
+                // Find where the suffix ends - either at a tag marker or at the JSON args start
+                size_t suffix_end = func_name_end + 1;
+                // Skip the index digits
+                while (suffix_end < func_context.length() &&
+                       std::isdigit(static_cast<unsigned char>(func_context[suffix_end]))) {
+                    suffix_end++;
+                }
+                if (suffix_end < func_context.length()) {
+                    char after_index = func_context[suffix_end];
+                    if (after_index == '<') {
+                        // There's a marker after the index (e.g., :0<|tool_call_argument_begin|>)
+                        size_t tag_close = func_context.find('>', suffix_end);
+                        if (tag_close != std::string::npos) {
+                            patterns.function_name_suffix =
+                                func_context.substr(func_name_end, tag_close - func_name_end + 1);
+                        } else {
+                            patterns.function_name_suffix =
+                                func_context.substr(func_name_end, suffix_end - func_name_end);
+                        }
+                    } else {
+                        // Just the index part (e.g., :0)
+                        patterns.function_name_suffix = func_context.substr(func_name_end, suffix_end - func_name_end);
+                    }
+                }
+            } else if (next_char == '\n' || next_char == '\r') {
+                // Check for markdown code block pattern (e.g., DeepSeek R1): \n```json\n{...}\n```<end>
+                size_t code_block_start = func_context.find("```", func_name_end);
+                if (code_block_start != std::string::npos && code_block_start < func_name_end + 10) {
+                    // Found code block start after function name
+                    // Skip the optional language tag (e.g., "json")
+                    size_t newline_after_lang = func_context.find('\n', code_block_start + 3);
+                    if (newline_after_lang != std::string::npos) {
+                        // function_name_suffix should include everything up to (and including) the newline after language tag
+                        patterns.function_name_suffix =
+                            func_context.substr(func_name_end, newline_after_lang - func_name_end + 1);
+                        LOG_DBG("Found markdown code block suffix: '%s'\n", patterns.function_name_suffix.c_str());
+                    }
+                }
+            }
+        }
+
+        // Function closer
+        size_t search_start = func_name_end;
+        if (!patterns.function_name_suffix.empty()) {
+            search_start += patterns.function_name_suffix.length();
+        }
+        patterns.function_closer = find_closing_pattern(func_context, search_start);
+
+        // Fix for XML-style tag formats where function_closer was detected as "}" (JSON closing)
+        // but should be the actual tag closer (e.g., <|tool_call_end|> or <｜tool▁call▁end｜>)
+        if (patterns.function_closer == "}" && !patterns.function_opener.empty() &&
+            patterns.function_opener[0] == '<') {
+            // This is an XML-style tag format, so the closer should be a tag, not just "}"
+            // Find the next tag marker after the search position
+            size_t next_tag = func_context.find('<', search_start);
+            if (next_tag != std::string::npos) {
+                // Handle both standard <|...|> and fullwidth <｜...｜> formats
+                size_t closer_pos = find_token_closer(func_context, next_tag);
+                if (closer_pos != std::string::npos) {
+                    size_t closer_len        = get_token_closer_length(func_context, closer_pos);
+                    patterns.function_closer = func_context.substr(next_tag, closer_pos - next_tag + closer_len);
+                    LOG_DBG("Adjusted function_closer from '}' to tag '%s' for XML-style format\n",
+                            patterns.function_closer.c_str());
+                }
+            }
+        }
+
+        if (patterns.function_closer == "}" && !patterns.function_name_suffix.empty() &&
+            patterns.function_name_suffix.find("```") != std::string::npos) {
+            // function_name_suffix contains a code block opener, look for the closing code block
+            size_t code_block_end = func_context.find("```", search_start);
+            if (code_block_end != std::string::npos) {
+                // Found closing code block, extract everything from ``` to end of tool call
+                // The closer should be \n```<per_call_end> (everything from ``` to the end marker)
+                size_t after_block = code_block_end + 3;
+                // Find the next tag marker (e.g., <|tool_call_end|>)
+                size_t next_tag    = func_context.find('<', after_block);
+                if (next_tag != std::string::npos) {
+                    size_t tag_end = func_context.find('>', next_tag);
+                    if (tag_end != std::string::npos) {
+                        // Don't include leading newline - the JSON args parser consumes trailing whitespace
+                        // So start exactly at the ``` (code_block_end)
+                        patterns.function_closer = func_context.substr(code_block_end, tag_end - code_block_end + 1);
+                        LOG_DBG("Detected markdown code block args, adjusted function_closer to: '%s'\n",
+                                patterns.function_closer.c_str());
+                    }
+                }
+            }
+        }
+
+        // Tool call start marker
+        if (patterns.function_opener.length() > 0 &&
+            patterns.tool_call_opener.length() > patterns.function_opener.length()) {
+            size_t opener_start = patterns.tool_call_opener.length() - patterns.function_opener.length();
+            if (opener_start > 0) {
+                std::string before_func    = patterns.tool_call_opener.substr(0, opener_start);
+                size_t      last_bracket   = before_func.find_last_of('[');
+                size_t      tool_obj_brace = std::string::npos;
+                if (last_bracket != std::string::npos && last_bracket + 1 < before_func.length()) {
+                    tool_obj_brace = before_func.find('{', last_bracket + 1);
+                }
+
+                if (tool_obj_brace != std::string::npos) {
+                    patterns.tool_call_start_marker = before_func.substr(0, tool_obj_brace);
+                } else if (last_bracket != std::string::npos) {
+                    patterns.tool_call_start_marker = before_func.substr(0, last_bracket + 1);
+                } else {
+                    patterns.tool_call_start_marker = before_func;
+                }
+            }
+        } else if (patterns.tool_call_start_marker.empty()) {
+            // Only search if not already set (e.g., by >>> prefix detection)
+            patterns.tool_call_start_marker = find_tool_call_start(tool1_diff);
+        }
+
+        if (patterns.tool_call_opener.empty()) {
+            patterns.tool_call_opener = infer_tool_call_opener(tool1_diff, tool2_diff, tool3_diff);
+            if (func1_pos != std::string::npos && patterns.tool_call_opener.length() > func1_pos) {
+                patterns.tool_call_opener = patterns.tool_call_opener.substr(0, func1_pos);
+            }
+        }
+        if (patterns.tool_call_closer.empty()) {
+            patterns.tool_call_closer = infer_tool_call_closer(tool1_diff, tool2_diff, tool3_diff);
+        }
+
+        patterns.tool_call_end_marker = find_tool_call_end(func_context, func1_pos);
+
+        if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.length() > 1) {
+            size_t eos_pos = patterns.tool_call_end_marker.find("<|");
+            if (eos_pos == 1) {
+                // Check if there's a bracket/brace before the token
+                char first_char = patterns.tool_call_end_marker[0];
+                if (first_char == ']' || first_char == '}') {
+                    // Check if this is an actual EOS token (contains "eot_id" or "eos")
+                    std::string token_content = patterns.tool_call_end_marker.substr(eos_pos);
+                    if (token_content.find("eot_id") != std::string::npos ||
+                        token_content.find("eos") != std::string::npos) {
+                        // This is an EOS token, strip it
+                        patterns.tool_call_end_marker = patterns.tool_call_end_marker.substr(0, 1);
+                    }
+                }
+            }
+        }
+
+        // Trim whitespace
+        if (!patterns.tool_call_end_marker.empty()) {
+            size_t first = patterns.tool_call_end_marker.find_first_not_of(" \n\t");
+            size_t last  = patterns.tool_call_end_marker.find_last_not_of(" \n\t");
+            if (first != std::string::npos && last != std::string::npos) {
+                patterns.tool_call_end_marker = patterns.tool_call_end_marker.substr(first, (last - first + 1));
+            }
+        }
+
+        // If tool_call_end_marker matches function_closer, it found the wrong tag.
+        // Use tool_call_closer instead which is derived from common suffix of diffs.
+        if (!patterns.function_closer.empty() && patterns.tool_call_end_marker == patterns.function_closer) {
+            if (!patterns.tool_call_closer.empty()) {
+                // Try to extract a proper closing tag from tool_call_closer
+                // Use rfind to get the LAST closing tag (e.g.,  not </function>)
+                size_t close_start = patterns.tool_call_closer.rfind("</");
+                if (close_start != std::string::npos) {
+                    size_t close_end = patterns.tool_call_closer.find('>', close_start);
+                    if (close_end != std::string::npos) {
+                        patterns.tool_call_end_marker =
+                            patterns.tool_call_closer.substr(close_start, close_end - close_start + 1);
+                    }
+                }
+            }
+        } else if (patterns.tool_call_end_marker == ">" && !patterns.tool_call_closer.empty() &&
+                   patterns.tool_call_closer.length() > 3) {
+            // If the specific end marker is just ">", but the common suffix (tool_call_closer) is substantial (e.g. <|tool_calls_section_end|>)
+            // then prefer the common suffix, as finding ">" might just be hitting the end of the last function call
+            if (patterns.tool_call_closer.find(patterns.tool_call_end_marker) != std::string::npos) {
+                patterns.tool_call_end_marker = patterns.tool_call_closer;
+            }
+        }
+
+        if (patterns.tool_call_start_marker.empty()) {
+            std::vector<std::string> diffs  = { tool1_diff, tool2_diff, tool3_diff };
+            patterns.tool_call_start_marker = find_common_substring_limited(diffs, 20, " \n\t<[{");
+        }
+
+        // Truncate if needed, but skip if func_pos is 0 (marker found via full output)
+        if (func1_pos != std::string::npos && func1_pos > 0 && patterns.tool_call_start_marker.length() > func1_pos) {
+            std::string candidate   = patterns.tool_call_start_marker.substr(0, func1_pos);
+            size_t      last_opener = candidate.find_last_of("{[");
+            if (last_opener != std::string::npos) {
+                patterns.tool_call_start_marker = candidate.substr(0, last_opener);
+            } else {
+                patterns.tool_call_start_marker = candidate;
+            }
+        }
+
+        // Ensure we don't truncate in the middle of <|...|> tokens
+        patterns.tool_call_start_marker = adjust_to_token_boundary(patterns.tool_call_start_marker);
+        patterns.tool_call_end_marker   = adjust_to_token_boundary(patterns.tool_call_end_marker);
+
+        // Final trim
+        if (!patterns.tool_call_start_marker.empty()) {
+            size_t first = patterns.tool_call_start_marker.find_first_not_of(" \n\t\r");
+            size_t last  = patterns.tool_call_start_marker.find_last_not_of(" \n\t\r");
+            if (first != std::string::npos && last != std::string::npos) {
+                patterns.tool_call_start_marker = patterns.tool_call_start_marker.substr(first, (last - first + 1));
+            }
+        }
+    }
+
+    return patterns;
+}
+
+internal_tool_format determine_format_from_patterns(const internal_discovered_pattern & patterns) {
+    LOG_DBG("%s\n", __func__);
+
+    if (patterns.tool_call_opener.empty() && patterns.tool_call_closer.empty() && patterns.function_opener.empty() &&
+        patterns.function_closer.empty() && patterns.parameter_opener.empty() && patterns.parameter_closer.empty() &&
+        patterns.argument_separator.empty() && patterns.tool_call_start_marker.empty() &&
+        patterns.tool_call_end_marker.empty()) {
+        LOG_DBG("All patterns are empty - template doesn't support tool calls\n");
+        return FORMAT_UNKNOWN;
+    }
+
+    // Check for markdown code block format (Cohere Command-R Plus)
+    // STRUCTURAL PATTERN: Action:\n```json\n[...]\n```
+    // Key indicators:
+    // 1. tool_call_start_marker contains "Action:" or similar plain text marker
+    // 2. function_name_suffix or tool_call_closer contains "```" (markdown code fence)
+    // 3. tool_call_opener starts with "[" indicating JSON array
+    bool has_code_fence = false;
+    if (!patterns.function_name_suffix.empty() && patterns.function_name_suffix.find("```") != std::string::npos) {
+        has_code_fence = true;
+    }
+    if (!patterns.tool_call_closer.empty() && patterns.tool_call_closer.find("```") != std::string::npos) {
+        has_code_fence = true;
+    }
+    bool has_action_marker = false;
+    if (!patterns.tool_call_start_marker.empty()) {
+        std::string marker_lower = patterns.tool_call_start_marker;
+        std::transform(marker_lower.begin(), marker_lower.end(), marker_lower.begin(), ::tolower);
+        if (marker_lower.find("action") != std::string::npos) {
+            has_action_marker = true;
+        }
+    }
+    if (has_code_fence && has_action_marker) {
+        LOG_DBG("Detected MARKDOWN_CODE_BLOCK format (Action: + ```json code fence)\n");
+        return FORMAT_MARKDOWN_CODE_BLOCK;
+    }
+
+    // Check for recipient-based routing format (e.g., Functionary v3.2)
+    // STRUCTURAL PATTERN: The same marker is used for both content routing and tool routing
+    // Key indicators:
+    // 1. tool_call_start_marker == function_opener (same marker used for both)
+    // 2. No parameter markers (arguments are plain dict/JSON, not wrapped in tags)
+    // 3. No XML-style tags (differentiates from FUNC_TAG_WITH_NAME)
+    // 4. function_opener doesn't start with structural chars like {, [, < (differentiates from other formats)
+    if (!patterns.tool_call_start_marker.empty() && !patterns.function_opener.empty() &&
+        patterns.tool_call_start_marker == patterns.function_opener) {
+        // Check this isn't an XML-tagged format (opener would start with '<')
+        if (patterns.function_opener[0] != '<' && patterns.function_opener[0] != '{' &&
+            patterns.function_opener[0] != '[') {
+            // Check there are no parameter markers
+            if (patterns.parameter_opener.empty() && patterns.parameter_closer.empty()) {
+                LOG_DBG("Detected RECIPIENT_BASED format (tool_call_start_marker == function_opener = '%s')\n",
+                        patterns.tool_call_start_marker.c_str());
+                return FORMAT_RECIPIENT_BASED;
+            }
+        }
+    }
+
+    if (!patterns.tool_call_opener.empty()) {
+        if (patterns.tool_call_opener.find("{\"name\":") != std::string::npos ||
+            patterns.tool_call_opener.find("{&quot;name&quot;:") != std::string::npos) {
+            LOG_DBG("Detected JSON_NATIVE format from tool_call_opener JSON structure\n");
+            return FORMAT_JSON_NATIVE;
+        }
+    }
+
+    if (!patterns.function_opener.empty() && patterns.function_opener.find('<') == 0) {
+        bool has_substantial_param_markers = false;
+        if (!patterns.parameter_opener.empty()) {
+            has_substantial_param_markers = (count_non_whitespace(patterns.parameter_opener) > 1);
+        }
+        if (!has_substantial_param_markers && !patterns.parameter_closer.empty()) {
+            has_substantial_param_markers = (count_non_whitespace(patterns.parameter_closer) > 1);
+        }
+
+        if (!has_substantial_param_markers) {
+            if ((!patterns.tool_call_opener.empty() && (patterns.tool_call_opener.find('[') != std::string::npos ||
+                                                        patterns.tool_call_opener.find('{') != std::string::npos)) ||
+                (!patterns.tool_call_start_marker.empty() &&
+                 (patterns.tool_call_start_marker.find('[') != std::string::npos ||
+                  patterns.tool_call_start_marker.find('{') != std::string::npos))) {
+                LOG_DBG("Detected JSON_NATIVE format (XML markers but JSON structure)\n");
+                return FORMAT_JSON_NATIVE;
+            }
+        }
+
+        LOG_DBG("Detected XML_CONSTRUCTED format from function_opener\n");
+        return FORMAT_XML_CONSTRUCTED;
+    }
+
+    if (!patterns.function_opener.empty() && patterns.function_opener.find('{') == 0) {
+        LOG_DBG("Detected JSON_NATIVE format from function_opener\n");
+        return FORMAT_JSON_NATIVE;
+    }
+
+    // Check for bracket-tag format: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...}
+    // Detected when function_name_suffix contains bracket tags like [CALL_ID]...[ARGS]
+    if (!patterns.function_name_suffix.empty() && patterns.function_name_suffix.find('[') != std::string::npos &&
+        patterns.function_name_suffix.find(']') != std::string::npos) {
+        LOG_DBG("Detected BRACKET_TAG format from function_name_suffix containing bracket tags\n");
+        return FORMAT_BRACKET_TAG;
+    }
+
+    if (!patterns.tool_call_start_marker.empty() &&
+        (patterns.tool_call_start_marker.find('<') == 0 || patterns.tool_call_start_marker.find('[') == 0)) {
+        bool is_prefix_marker =
+            patterns.tool_call_start_marker.find("<|") == 0 || patterns.tool_call_start_marker.find("[|") == 0;
+        // Check for bracket-tag format: [TAG] style without | (e.g., [TOOL_CALLS])
+        bool is_bracket_tag = patterns.tool_call_start_marker.find('[') == 0 &&
+                              patterns.tool_call_start_marker.find("[|") != 0 &&
+                              patterns.tool_call_start_marker.find(']') != std::string::npos;
+        if (is_bracket_tag) {
+            LOG_DBG("Detected BRACKET_TAG format from tool_call_start_marker\n");
+            return FORMAT_BRACKET_TAG;
+        }
+        if (is_prefix_marker) {
+            LOG_DBG("Detected JSON_NATIVE format from tool_call_start_marker (instruction-based)\n");
+            return FORMAT_JSON_NATIVE;
+        }
+
+        LOG_DBG("Detected XML_CONSTRUCTED format from tool_call_start_marker\n");
+        return FORMAT_XML_CONSTRUCTED;
+    }
+
+    if (!patterns.tool_call_start_marker.empty() && patterns.tool_call_start_marker.find('{') == 0) {
+        LOG_DBG("Detected JSON_NATIVE format from tool_call_start_marker\n");
+        return FORMAT_JSON_NATIVE;
+    }
+
+    if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.find('>') == 0) {
+        LOG_DBG("Detected XML_CONSTRUCTED format from tool_call_end_marker\n");
+        return FORMAT_XML_CONSTRUCTED;
+    }
+
+    if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.find('}') == 0) {
+        LOG_DBG("Detected JSON_NATIVE format from tool_call_end_marker\n");
+        return FORMAT_JSON_NATIVE;
+    }
+
+    LOG_DBG("Format could not be determined from patterns\n");
+    return FORMAT_UNKNOWN;
+}
+
+internal_discovered_pattern analyze_by_differential(const common_chat_template & tmpl) {
+    internal_discovered_pattern patterns;
+
+    try {
+        LOG_DBG("%s\n", __func__);
+
+        auto caps                      = tmpl.original_caps();
+        bool minja_supports_tool_calls = caps.supports_tool_calls;
+        if (!minja_supports_tool_calls) {
+            LOG_DBG("Template doesn't support standard tool calls (per minja caps detection)\n");
+        }
+
+        // Define tools for testing
+        json tools = {
+            { { "type", "function" },
+             { "function",
+                { { "name", "test_function_name" },
+                  { "description", "A test function" },
+                  { "parameters",
+                    { { "type", "object" },
+                      { "properties",
+                        { { "param1", { { "type", "string" }, { "description", "First parameter" } } },
+                          { "param2", { { "type", "string" }, { "description", "Second parameter" } } } } },
+                      { "required", json::array({ "param1", "param2" }) } } } } } },
+            { { "type", "function" },
+             { "function",
+                { { "name", "another_test_function" },
+                  { "description", "Another test function" },
+                  { "parameters",
+                    { { "type", "object" },
+                      { "properties",
+                        { { "param1", { { "type", "string" }, { "description", "First parameter" } } } } },
+                      { "required", json::array({ "param1" }) } } } } }           }
+        };
+
+        // Test payload 1: Tool definitions + user + assistant with content only (no tool calls)
+        json user_msg = {
+            { "role",    "user"                        },
+            { "content", "Please help me with a task." }
+        };
+
+        json assistant_content_only = {
+            { "role",    "assistant"                                },
+            { "content", "I'll help you with that task right away." }
+        };
+
+        // Test payload 2: Tool definitions + user + assistant with content + tool calls
+        json assistant_content_with_tool = {
+            { "role",       "assistant"                                                                              },
+            { "content",    "I'll help you with that task right away."                                               },
+            { "tool_calls",
+             json::array(
+                  { { { "id", "call_0001" },
+                      { "type", "function" },
+                      { "function",
+                        { { "name", "test_function_name" },
+                          { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+        };
+
+        // Also test with content = null + tool calls (some templates check for this)
+        json assistant_null_content_with_tool = {
+            { "role",       "assistant"                                                                              },
+            { "content",    nullptr                                                                                  },
+            { "tool_calls",
+             json::array(
+                  { { { "id", "call_0001" },
+                      { "type", "function" },
+                      { "function",
+                        { { "name", "test_function_name" },
+                          { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+        };
+
+        struct templates_params inputs;
+        inputs.tools = tools;
+        inputs.add_generation_prompt = false;
+
+        // Helper function to safely render template, handling null content issues
+        auto safe_render = [&](const json & messages) -> std::string {
+            try {
+                // First try with the original messages
+                inputs.messages = messages;
+                return common_chat_template_direct_apply(tmpl, inputs);
+            } catch (const std::exception & e) {
+                // If it fails, try replacing null content with empty string
+                json fixed_messages = messages;
+                for (auto & msg : fixed_messages) {
+                    if (msg.contains("content") && msg["content"].is_null()) {
+                        msg["content"] = "";
+                    }
+                }
+                inputs.messages = fixed_messages;
+                try {
+                    return common_chat_template_direct_apply(tmpl, inputs);
+                } catch (...) {
+                    return "";
+                }
+            }
+        };
+
+        // Render payload 1: content only
+        std::string output_content_only = safe_render({ user_msg, assistant_content_only });
+
+        // Render payload 2: content + tool calls
+        std::string output_content_with_tool = safe_render({ user_msg, assistant_content_with_tool });
+
+        // Render payload 3: null content + tool calls
+        std::string output_null_content_with_tool = safe_render({ user_msg, assistant_null_content_with_tool });
+
+        LOG_DBG("Output 1 (content only): %s\n", output_content_only.c_str());
+        LOG_DBG("Output 2 (content + tools): %s\n", output_content_with_tool.c_str());
+        LOG_DBG("Output 3 (null + tools): %s\n", output_null_content_with_tool.c_str());
+
+        // Check if the template renders tool calls in any scenario
+        // Test 1: content vs content+tool_calls (for templates that render both)
+        // Test 2: content vs null+tool_calls (for templates that only render tools when content is null)
+        bool renders_tool_calls_with_content    = (output_content_only != output_content_with_tool);
+        bool renders_tool_calls_without_content = (output_content_only != output_null_content_with_tool);
+
+        if (!renders_tool_calls_with_content && !renders_tool_calls_without_content) {
+            LOG_DBG("Template does NOT render tool calls in any scenario\n");
+            // Return empty patterns to indicate no tool support
+            return patterns;
+        }
+
+        LOG_DBG("Template renders tool calls, proceeding with differential analysis\n");
+
+        // If we get here, the template does support tool calls
+        // Use the original differential analysis approach but now we know it's valid
+        json base_msg = {
+            { "role",    "assistant" },
+            { "content", "MARKER"    }
+        };
+
+        // Use nullptr for content to trigger tool_calls branch in templates that check "content is none"
+        // Include "id" field as some templates (e.g., Mistral Nemo) require it
+        json tool_msg1 = {
+            { "role",       "assistant"                                                                          },
+            { "content",    nullptr                                                                              },
+            { "tool_calls",
+             json::array(
+                  { { { "id", "call_0001" },
+                      { "type", "function" },
+                      { "function", { { "name", "test_function_name" }, { "arguments", json::object() } } } } }) }
+        };
+
+        json tool_msg2 = {
+            { "role",       "assistant"                                                                              },
+            { "content",    nullptr                                                                                  },
+            { "tool_calls",
+             json::array(
+                  { { { "id", "call_0001" },
+                      { "type", "function" },
+                      { "function",
+                        { { "name", "test_function_name" },
+                          { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+        };
+
+        json tool_msg3 = {
+            { "role",       "assistant"                                                                             },
+            { "content",    nullptr                                                                                 },
+            { "tool_calls",
+             json::array(
+                  { { { "id", "call_0001" },
+                      { "type", "function" },
+                      { "function", { { "name", "test_function_name" }, { "arguments", json::object() } } } },
+                    { { "id", "call_0002" },
+                      { "type", "function" },
+                      { "function", { { "name", "another_test_function" }, { "arguments", json::object() } } } } }) }
+        };
+
+        inputs.messages  = { user_msg, base_msg };
+        auto base_output = safe_render({ user_msg, base_msg });
+
+        inputs.messages   = { user_msg, tool_msg1 };
+        auto tool1_output = safe_render({ user_msg, tool_msg1 });
+
+        // Detect if template renders null content as "None" (Python/Jinja string representation)
+        // This happens when templates concatenate content without null checks, e.g.:
+        //   {{ '<|im_start|>' + message.role + '\n' + content }}
+        // Check if "None" appears in the tool output where it shouldn't
+        if (tool1_output.find("None") != std::string::npos) {
+            // Verify this is actually from null content by checking if it goes away with empty string
+            json tool_msg1_empty_content       = tool_msg1;
+            tool_msg1_empty_content["content"] = "";
+            auto tool1_output_empty            = safe_render({ user_msg, tool_msg1_empty_content });
+            if (tool1_output_empty.find("None") == std::string::npos) {
+                LOG_DBG("Template renders null content as 'None', switching to empty string\n");
+                patterns.requires_nonnull_content = true;
+                tool1_output                      = tool1_output_empty;
+
+                // Update tool messages to use empty string instead of null
+                tool_msg1["content"] = "";
+                tool_msg2["content"] = "";
+                tool_msg3["content"] = "";
+            }
+        }
+
+        inputs.messages   = { user_msg, tool_msg2 };
+        auto tool2_output = safe_render({ user_msg, tool_msg2 });
+
+        inputs.messages   = { user_msg, tool_msg3 };
+        auto tool3_output = safe_render({ user_msg, tool_msg3 });
+
+        std::string tool1_diff = find_string_difference(base_output, tool1_output);
+        std::string tool2_diff = find_string_difference(base_output, tool2_output);
+        std::string tool3_diff = find_string_difference(base_output, tool3_output);
+
+        LOG_DBG("Tool1 diff length: %zu\n", tool1_diff.length());
+        LOG_DBG("Tool2 diff length: %zu\n", tool2_diff.length());
+        LOG_DBG("Tool3 diff length: %zu\n", tool3_diff.length());
+
+        if (tool1_diff.empty() && tool2_diff.empty() && tool3_diff.empty()) {
+            LOG_DBG("All diffs are empty - trying without add_generation_prompt\n");
+            // Try with add_generation_prompt variations
+            json alternative_base_msg = {
+                { "role",    "assistant" },
+                { "content", "MARKER"    }
+            };
+
+            templates_params alt_inputs;
+            alt_inputs.tools                 = tools;
+            alt_inputs.messages              = { user_msg, alternative_base_msg };
+            alt_inputs.add_generation_prompt = false;
+            auto alt_base                    = common_chat_template_direct_apply(tmpl, alt_inputs);
+
+            alt_inputs.messages = { user_msg, tool_msg1 };
+            auto alt_tool1      = common_chat_template_direct_apply(tmpl, alt_inputs);
+
+            tool1_diff = find_string_difference(alt_base, alt_tool1);
+            if (!tool1_diff.empty()) {
+                // If we found a diff using the alternative approach, we must use the corresponding
+                // full output for pattern extraction (otherwise diff indices will be invalid)
+                tool1_output = alt_tool1;
+
+                alt_inputs.messages = { user_msg, tool_msg2 };
+                tool2_diff          = find_string_difference(alt_base, common_chat_template_direct_apply(tmpl, inputs));
+                alt_inputs.messages = { user_msg, tool_msg3 };
+                tool3_diff          = find_string_difference(alt_base, common_chat_template_direct_apply(tmpl, inputs));
+            }
+        }
+
+        patterns = extract_patterns_from_differences(tool1_diff, tool2_diff, tool3_diff, tool1_output);
+
+        LOG_DBG("=== ENDING TEMPLATE DIFFERENTIAL ANALYSIS ===\n");
+
+    } catch (const std::exception & e) {
+        LOG_DBG("Template differential analysis failed: %s\n", e.what());
+    }
+
+    return patterns;
+}
diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h
new file mode 100644
index 0000000000..5162b09fbe
--- /dev/null
+++ b/common/chat-auto-parser-helpers.h
@@ -0,0 +1,133 @@
+#pragma once
+
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "chat.h"
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::ordered_json;
+
+namespace minja {
+class chat_template;
+}
+
+void   trim_whitespace(std::string & str);
+void   trim_trailing_newlines(std::string & str);
+size_t count_non_whitespace(const std::string & str);
+size_t find_last_of_any(const std::string & str, const std::string & chars, size_t start_pos);
+
+std::string extract_tag_name(const std::string & tag);
+std::string create_closing_tag(const std::string & opening_tag);
+
+std::string find_common_prefix(const std::vector<std::string> & strings);
+std::string find_common_suffix_generic(const std::vector<std::string> & strings);
+std::string find_common_substring_limited(const std::vector<std::string> & strings,
+                                          size_t                           max_length,
+                                          const std::string &              delimiters);
+
+bool        string_ends_with(const std::string & str, const std::string & suffix);
+std::string apply_template(common_chat_template      &    tmpl,
+                           const struct templates_params & inputs,
+                           const std::optional<json> &     messages_override  = std::nullopt,
+                           const std::optional<json> &     tools_override     = std::nullopt,
+                           const std::optional<json> &     additional_context = std::nullopt);
+
+// Adjust a marker string to ensure it ends at a complete <|...|> token boundary
+// This prevents truncation mid-token
+std::string adjust_to_token_boundary(const std::string & str);
+
+// Find the position of a token opener (<| or <｜) in a string
+// Returns std::string::npos if not found
+size_t find_token_opener(const std::string & str, size_t start_pos = 0);
+
+// Find the position of a token closer (|> or ｜>) in a string
+// Returns std::string::npos if not found
+size_t find_token_closer(const std::string & str, size_t start_pos = 0);
+
+// Get the length of the token opener at the given position (2 for <| or 4 for <｜)
+// Returns 0 if no valid opener at position
+size_t get_token_opener_length(const std::string & str, size_t pos);
+
+// Get the length of the token closer at the given position (2 for |> or 4 for ｜>)
+// Returns 0 if no valid closer at position
+size_t get_token_closer_length(const std::string & str, size_t pos);
+
+// Strip EOS/end-of-sentence tokens from the end of a string
+// Handles both standard (<|eos|>, <|eot_id|>) and fullwidth (<｜end▁of▁sentence｜>) formats
+std::string strip_eos_token(const std::string & str);
+
+// Internal structure for differential analysis (used during pattern extraction)
+struct internal_discovered_pattern {
+    std::string tool_call_opener;
+    std::string tool_call_closer;
+    std::string function_opener;
+    std::string function_closer;
+    std::string function_name_suffix;
+    std::string parameter_opener;
+    std::string parameter_closer;
+    std::string argument_separator;
+    std::string parameter_key_prefix;
+    std::string parameter_key_suffix;
+    std::string tool_call_start_marker;
+    std::string tool_call_end_marker;
+    std::string reasoning_start_marker;
+    std::string reasoning_end_marker;
+    std::string content_start_marker;
+    std::string content_end_marker;
+    std::string tool_name_field = "name";
+    std::string tool_args_field = "arguments";
+    std::string tool_id_field;
+    // For markdown code block format (Cohere Command-R Plus)
+    std::string code_block_marker;    // e.g., "Action:"
+    std::string code_block_language;  // e.g., "json"
+    // Flag: template renders null content as "None" string, requires empty string instead
+    bool        requires_nonnull_content = false;
+};
+
+// Internal enum for format classification
+enum internal_tool_format {
+    FORMAT_JSON_NATIVE,
+    FORMAT_XML_CONSTRUCTED,
+    FORMAT_BRACKET_TAG,          // [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} (Mistral Small 3.2)
+    FORMAT_RECIPIENT_BASED,      // >>>recipient\n{content} (Functionary v3.2)
+    FORMAT_MARKDOWN_CODE_BLOCK,  // Action:\n```json\n[...]\n``` (Cohere Command-R Plus)
+    FORMAT_CONTENT_ONLY,
+    FORMAT_UNKNOWN
+};
+
+// Find the suffix that differentiates an extended string from a base string
+std::string find_string_difference(const std::string & base, const std::string & extended);
+
+// Extract JSON field name from an opener string
+std::string extract_json_field_name(const std::string &              opener,
+                                    const std::string &              default_name,
+                                    const std::vector<std::string> & candidates);
+
+// Find a closing pattern in a string starting from a given position
+std::string find_closing_pattern(const std::string & diff, size_t func_pos);
+
+// Find the tool call start marker in a difference string
+std::string find_tool_call_start(const std::string & diff);
+
+// Find the tool call end marker in a difference string
+std::string find_tool_call_end(const std::string & diff, size_t func_pos);
+
+// Infer the tool call opener from multiple difference strings
+std::string infer_tool_call_opener(const std::string & diff1, const std::string & diff2, const std::string & diff3);
+
+// Infer the tool call closer from multiple difference strings
+std::string infer_tool_call_closer(const std::string & diff1, const std::string & diff2, const std::string & diff3);
+
+// Extract patterns from differences between tool calls
+internal_discovered_pattern extract_patterns_from_differences(const std::string & tool1_diff,
+                                                            const std::string & tool2_diff,
+                                                            const std::string & tool3_diff,
+                                                            const std::string & tool1_full = "");
+
+// Determine the format classification from discovered patterns
+internal_tool_format determine_format_from_patterns(const internal_discovered_pattern & patterns);
+
+// Analyze template using differential analysis (internal use)
+internal_discovered_pattern analyze_by_differential(const common_chat_template & tmpl);
diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h
new file mode 100644
index 0000000000..6062f4d37a
--- /dev/null
+++ b/common/chat-auto-parser.h
@@ -0,0 +1,183 @@
+#pragma once
+
+#include "chat.h"
+#include "common.h"
+#include "jinja/runtime.h"
+
+#include <chrono>
+#include <string>
+#include <vector>
+
+using json = nlohmann::ordered_json;
+
+// Phase 1 result: Content and reasoning structure (analyzed without tools)
+struct content_structure {
+    // Reasoning handling mode
+    enum reasoning_mode_type {
+        REASONING_NONE,         // No reasoning markers detected
+        REASONING_OPTIONAL,     // <think>...</think> may appear before content
+        REASONING_FORCED_OPEN,  // Template ends with open reasoning tag (thinking_forced_open)
+    };
+
+    reasoning_mode_type reasoning_mode = REASONING_NONE;
+    std::string         reasoning_start;  // e.g., "<think>", "<|START_THINKING|>"
+    std::string         reasoning_end;    // e.g., "</think>", "<|END_THINKING|>"
+
+    // Content wrapping mode
+    enum content_mode_type {
+        CONTENT_PLAIN,                   // No content markers
+        CONTENT_ALWAYS_WRAPPED,          // <response>...</response> always present
+        CONTENT_WRAPPED_WITH_REASONING,  // Content wrapped only when reasoning present
+    };
+
+    content_mode_type content_mode = CONTENT_PLAIN;
+    std::string       content_start;  // e.g., "<response>", "<|START_RESPONSE|>"
+    std::string       content_end;    // e.g., "</response>", "<|END_RESPONSE|>"
+};
+
+// Phase 2 result: Tool call structure (layered on Phase 1)
+struct tool_call_structure {
+    bool supports_tools = false;
+
+    // Container markers (what wraps all tool calls)
+    std::string tool_section_start;  // e.g., "<tool_call>", "[TOOL_CALLS]", "<TOOLCALL>", ""
+    std::string tool_section_end;    // e.g., "</tool_call>", "]", "</TOOLCALL>", ""
+
+    // Function format (how individual functions are structured)
+    enum function_format {
+        FUNC_JSON_OBJECT,       // {"name": "X", "arguments": {...}}
+        FUNC_TAG_WITH_NAME,     // <function=X>{...}</function>
+        FUNC_TAG_NAME_ONLY,     // <X>...</X> where X is function name (rare)
+        FUNC_PREFIXED_INDEXED,  // <|tool_call_begin|>functions.X:0<|tool_call_argument_begin|>{...}<|tool_call_end|>
+        FUNC_NAME_AS_KEY,       // [{"function_name": {...arguments...}}] (Apertus-style)
+        FUNC_BRACKET_TAG,       // [TOOL_CALLS]X[CALL_ID]id[ARGS]{...} (Mistral Small 3.2 style)
+        FUNC_RECIPIENT_BASED,   // >>>recipient\n{content} where recipient is "all" (content) or function name (tools)
+        FUNC_MARKDOWN_CODE_BLOCK,  // Action:\n```json\n[...]\n``` (Cohere Command-R Plus style)
+    };
+
+    function_format function_format = FUNC_JSON_OBJECT;
+
+    // For FUNC_JSON_OBJECT format - field names (may vary between templates)
+    std::string name_field = "name";       // Could be "tool_name", "function"
+    std::string args_field = "arguments";  // Could be "parameters", "params", "input"
+    std::string id_field;                  // Optional: "id", "tool_call_id", ""
+
+    // For FUNC_TAG_WITH_NAME format
+    std::string function_prefix;  // e.g., "<function="
+    std::string function_suffix;  // e.g., ">"
+    std::string function_close;   // e.g., "</function>"
+
+    // For FUNC_PREFIXED_INDEXED format (e.g., Kimi-K2)
+    std::string per_call_start;      // e.g., "<|tool_call_begin|>"
+    std::string function_namespace;  // e.g., "functions." (prefix before function name)
+    std::string args_marker;         // e.g., "<|tool_call_argument_begin|>"
+    std::string per_call_end;        // e.g., "<|tool_call_end|>"
+
+    // For FUNC_BRACKET_TAG format (e.g., Mistral Small 3.2)
+    std::string id_marker;  // e.g., "[CALL_ID]" - marker before tool call ID
+
+    // For FUNC_MARKDOWN_CODE_BLOCK format (e.g., Cohere Command-R Plus)
+    std::string code_block_marker;    // e.g., "Action:" - text marker before code block
+    std::string code_block_language;  // e.g., "json" - language identifier in code fence
+
+    // Argument format (how arguments are structured within a function)
+    enum argument_format {
+        ARGS_JSON,            // Standard JSON object: {"key": "value", ...}
+        ARGS_TAGGED,          // XML-style: <param=key>value</param>
+        ARGS_KEY_VALUE_TAGS,  // <arg_key>key</arg_key><arg_value>value</arg_value> (GLM-4.6)
+    };
+
+    argument_format argument_format = ARGS_JSON;
+
+    // For ARGS_TAGGED format
+    std::string arg_prefix;     // e.g., "<param=", "<parameter="
+    std::string arg_suffix;     // e.g., ">"
+    std::string arg_close;      // e.g., "</param>", "</parameter>"
+    std::string arg_separator;  // e.g., "", "\n"
+
+    // Flag: template renders null content as "None" string, requires empty string instead
+    bool requires_nonnull_content = false;
+};
+
+// Combined result of unified template analysis
+struct template_analysis_result {
+    content_structure   content;
+    tool_call_structure tools;
+
+    // Preserved tokens for tokenizer (union of all markers)
+    std::vector<std::string> preserved_tokens;
+};
+
+// Template analyzer that uses two-phase differential analysis
+class template_analyzer {
+  public:
+    // Main entry point: Unified two-phase analysis
+    static template_analysis_result analyze_template(const common_chat_template & tmpl);
+
+    // Phase 1 - Analyze content and reasoning structure (no tools)
+    static content_structure analyze_content_structure(const common_chat_template & tmpl);
+
+    // Phase 2 - Analyze tool call structure (layered on Phase 1)
+    static tool_call_structure analyze_tool_structure(const common_chat_template & tmpl,
+                                                      const content_structure &    content);
+
+  private:
+    // Phase 1 detection helpers
+    static void detect_reasoning_markers(const common_chat_template & tmpl, content_structure & cs);
+    static void detect_content_markers(const common_chat_template & tmpl, content_structure & cs);
+    static content_structure::reasoning_mode_type detect_reasoning_mode(const content_structure & cs,
+                                                                        const std::string &       prompt);
+
+    // Phase 2 detection helpers
+    static void detect_tool_markers(const common_chat_template & tmpl, tool_call_structure & ts);
+    static void detect_function_format(const common_chat_template & tmpl, tool_call_structure & ts);
+    static void detect_argument_format(const common_chat_template & tmpl, tool_call_structure & ts);
+
+    // Phase 2 helper methods
+    static void analyze_json_format(tool_call_structure & ts, const struct internal_discovered_pattern & discovered);
+    static void analyze_xml_format(tool_call_structure & ts, const struct internal_discovered_pattern & discovered);
+    static void analyze_bracket_tag_format(tool_call_structure &                      ts,
+                                           const struct internal_discovered_pattern & discovered);
+    static void analyze_recipient_based_format(tool_call_structure &                      ts,
+                                               const struct internal_discovered_pattern & discovered);
+    static void analyze_markdown_code_block_format(tool_call_structure &                      ts,
+                                                   const struct internal_discovered_pattern & discovered);
+
+    // Helper to collect preserved tokens from analysis result
+    static void collect_preserved_tokens(template_analysis_result & result);
+};
+
+struct templates_params {
+    json                                  messages;
+    json                                  tools;
+    common_chat_tool_choice               tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    json                                  json_schema;
+    bool                                  parallel_tool_calls = true;
+    common_reasoning_format               reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+    bool                                  stream = true;
+    std::string                           grammar;
+    bool                                  add_generation_prompt = false;
+    bool                                  enable_thinking = true;
+    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
+    json                                  extra_context;
+    bool                                  add_bos = false;
+    bool                                  add_eos = false;
+    bool                                  is_inference = true;
+    bool                                  add_inference = false;
+    bool                                  mark_input = true; // whether to mark input strings in the jinja context
+};
+
+class universal_peg_generator {
+  public:
+    // Generate parser from analysis result
+    static common_chat_params generate_parser(const template_analysis_result & analysis,
+                                              const common_chat_template &     tmpl,
+                                              const struct templates_params &  inputs);
+
+  private:
+    // Build unified parser (single code path for all formats)
+    static common_peg_arena build_parser(const template_analysis_result & analysis,
+                                         const common_chat_template &     tmpl,
+                                         const struct templates_params &  inputs,
+                                         bool                             thinking_forced_open);
+};
diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp
deleted file mode 100644
index a80900ff8d..0000000000
--- a/common/chat-parser-xml-toolcall.cpp
+++ /dev/null
@@ -1,879 +0,0 @@
-#include "chat.h"
-#include "chat-parser.h"
-#include "common.h"
-#include "json-partial.h"
-#include "json-schema-to-grammar.h"
-#include "log.h"
-#include "regex-partial.h"
-
-using json = nlohmann::ordered_json;
-
-class xml_toolcall_syntax_exception : public std::runtime_error {
-  public:
-    xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-template<typename T>
-inline void sort_uniq(std::vector<T> &vec) {
-    std::sort(vec.begin(), vec.end());
-    vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
-}
-
-template<typename T>
-inline bool all_space(const T &str) {
-    return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); });
-}
-
-static size_t utf8_truncate_safe(const std::string_view s) {
-    size_t len = s.size();
-    if (len == 0) return 0;
-    size_t i = len;
-    for (size_t back = 0; back < 4 && i > 0; ++back) {
-        --i;
-        unsigned char c = s[i];
-        if ((c & 0x80) == 0) {
-            return len;
-        } else if ((c & 0xC0) == 0xC0) {
-            size_t expected_len = 0;
-            if ((c & 0xE0) == 0xC0) expected_len = 2;
-            else if ((c & 0xF0) == 0xE0) expected_len = 3;
-            else if ((c & 0xF8) == 0xF0) expected_len = 4;
-            else return i;
-            if (len - i >= expected_len) {
-                return len;
-            } else {
-                return i;
-            }
-        }
-    }
-    return len - std::min(len, size_t(3));
-}
-
-inline void utf8_truncate_safe_resize(std::string &s) {
-    s.resize(utf8_truncate_safe(s));
-}
-
-inline std::string_view utf8_truncate_safe_view(const std::string_view s) {
-    return s.substr(0, utf8_truncate_safe(s));
-}
-
-static std::optional<common_chat_msg_parser::find_regex_result> try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) {
-    if (literal1.size() == 0) return builder.try_find_literal(literal2);
-    const auto saved_pos = builder.pos();
-    while (auto res = builder.try_find_literal(literal1)) {
-        builder.consume_spaces();
-        const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos());
-        if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) {
-            if (res->prelude.size() != res->groups[0].begin - saved_pos) {
-                res->prelude = builder.str({saved_pos, res->groups[0].begin});
-            }
-            builder.move_to(builder.pos() + match_len);
-            res->groups[0].end = builder.pos();
-            GGML_ASSERT(res->groups[0].begin != res->groups[0].end);
-            return res;
-        }
-        builder.move_to(res->groups[0].begin + 1);
-    }
-    builder.move_to(saved_pos);
-    return std::nullopt;
-}
-
-/**
- * make a GBNF that accept any strings except those containing any of the forbidden strings.
- */
-std::string make_gbnf_excluding(std::vector<std::string> forbids) {
-    constexpr auto charclass_escape = [](unsigned char c) -> std::string {
-        if (c == '\\' || c == ']' || c == '^' || c == '-') {
-            std::string s = "\\";
-            s.push_back((char)c);
-            return s;
-        }
-        if (isprint(c)) {
-            return std::string(1, (char)c);
-        }
-        char buf[16];
-        snprintf(buf, 15, "\\x%02X", c);
-        return std::string(buf);
-    };
-    constexpr auto build_expr = [charclass_escape](auto self, const std::vector<std::string>& forbids, int l, int r, int depth) -> std::string {
-        std::vector<std::pair<unsigned char, std::pair<int,int>>> children;
-        int i = l;
-        while (i < r) {
-            const std::string &s = forbids[i];
-            if ((int)s.size() == depth) {
-                ++i;
-                continue;
-            }
-            unsigned char c = (unsigned char)s[depth];
-            int j = i;
-            while (j < r && (int)forbids[j].size() > depth &&
-                   (unsigned char)forbids[j][depth] == c) {
-                ++j;
-            }
-            children.push_back({c, {i, j}});
-            i = j;
-        }
-        std::vector<std::string> alts;
-        if (!children.empty()) {
-            std::string cls;
-            for (auto &ch : children) cls += charclass_escape(ch.first);
-            alts.push_back(std::string("[^") + cls + "]");
-        }
-        for (auto &ch : children) {
-            std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1);
-            if (!childExpr.empty()) {
-                std::string quoted_ch = "\"";
-                if (ch.first == '\\') quoted_ch += "\\\\";
-                else if (ch.first == '"') quoted_ch += "\\\"";
-                else if (isprint(ch.first)) quoted_ch.push_back(ch.first);
-                else {
-                    char buf[16];
-                    snprintf(buf, 15, "\\x%02X", ch.first);
-                    quoted_ch += buf;
-                }
-                quoted_ch += "\"";
-                std::string branch = quoted_ch + std::string(" ") + childExpr;
-                alts.push_back(branch);
-            }
-        }
-        if (alts.empty()) return "";
-        std::ostringstream oss;
-        oss << "( ";
-        for (size_t k = 0; k < alts.size(); ++k) {
-            if (k) oss << " | ";
-            oss << alts[k];
-        }
-        oss << " )";
-        return oss.str();
-    };
-    if (forbids.empty()) return "( . )*";
-    sort(forbids.begin(), forbids.end());
-    std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0);
-    if (expr.empty()) {
-        std::string cls;
-        for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]);
-        expr = std::string("( [^") + cls + "] )";
-    }
-    if (forbids.size() == 1)
-        return expr + "*";
-    else
-        return std::string("( ") + expr + " )*";
-}
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) {
-    GGML_ASSERT(!form.tool_start.empty());
-    GGML_ASSERT(!form.tool_sep.empty());
-    GGML_ASSERT(!form.key_start.empty());
-    GGML_ASSERT(!form.val_end.empty());
-    GGML_ASSERT(!form.tool_end.empty());
-
-    std::string key_val_sep = form.key_val_sep;
-    if (form.key_val_sep2) {
-        key_val_sep += "\n";
-        key_val_sep += *form.key_val_sep2;
-    }
-    GGML_ASSERT(!key_val_sep.empty());
-
-    if (tools.is_array() && !tools.empty()) {
-        data.grammar = build_grammar([&](const common_grammar_builder &builder) {
-            auto string_arg_val = form.last_val_end ?
-                    builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) :
-                    builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end}));
-
-            std::vector<std::string> tool_rules;
-            for (const auto & tool : tools) {
-                if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
-                    LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str());
-                    continue;
-                }
-                const auto & function = tool.at("function");
-                if (!function.contains("name") || !function.at("name").is_string()) {
-                    LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str());
-                    continue;
-                }
-                if (!function.contains("parameters") || !function.at("parameters").is_object()) {
-                    LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str());
-                    continue;
-                }
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                struct parameter_rule {
-                    std::string symbol_name;
-                    bool is_required;
-                };
-                std::vector<parameter_rule> arg_rules;
-                if (!parameters.contains("properties") || !parameters.at("properties").is_object()) {
-                    LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str());
-                    continue;
-                } else {
-                    std::vector<std::string> requiredParameters;
-                    if (parameters.contains("required")) {
-                        try { parameters.at("required").get_to(requiredParameters); }
-                        catch (const std::runtime_error&) {
-                            LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str());
-                        }
-                    }
-                    sort_uniq(requiredParameters);
-                    for (const auto & [key, value] : parameters.at("properties").items()) {
-                        std::string quoted_key = key;
-                        bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key);
-                        if (form.key_start.back() == '"' && key_val_sep[0] == '"') {
-                            quoted_key = gbnf_format_literal(key);
-                            quoted_key = quoted_key.substr(1, quoted_key.size() - 2);
-                        }
-                        arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key,
-                            gbnf_format_literal(form.key_start) + " " +
-                            gbnf_format_literal(quoted_key) + " " +
-                            gbnf_format_literal(key_val_sep) + " " +
-                            ((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ?
-                                    (form.raw_argval ?
-                                            string_arg_val :
-                                            "( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )"
-                                    ) :
-                                    builder.add_schema(name + "-arg-" + key, value)
-                            )
-                        ), required});
-                    }
-                }
-
-                auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end));
-                decltype(next_arg_with_sep) next_arg = "\"\"";
-                for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) {
-                    std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep;
-                    next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ?
-                            include_this_arg : "( " + include_this_arg + " ) | " + next_arg
-                    );
-                    include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg;
-                    next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ?
-                            include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep
-                    );
-                }
-
-                std::string quoted_name = name;
-                if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') {
-                    quoted_name = gbnf_format_literal(name);
-                    quoted_name = quoted_name.substr(1, quoted_name.size() - 2);
-                }
-                quoted_name = gbnf_format_literal(quoted_name);
-                // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
-                if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) {
-                    quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+";
-                }
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                        gbnf_format_literal(form.tool_start) + " " +
-                        quoted_name + " " +
-                        gbnf_format_literal(form.tool_sep) + " " +
-                        next_arg
-                ));
-            }
-
-            auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | "));
-            auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once);
-            auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
-            auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
-            builder.add_rule("root",
-                (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
-                tool_call_multiple_with_end  + "?" +
-                (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
-            );
-        });
-
-        // grammar trigger for tool call
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start });
-    }
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) {
-    GGML_ASSERT(!form.tool_start.empty());
-    GGML_ASSERT(!form.key_start.empty());
-    GGML_ASSERT(!form.key_val_sep.empty());
-    GGML_ASSERT(!form.val_end.empty());
-    GGML_ASSERT(!form.tool_end.empty());
-
-    // Helper to choose return false or throw error
-    constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) {
-        LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str());
-        if (recovery) {
-            builder.move_to(start_pos);
-            return false;
-        } else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output.");
-    };
-    // Drop substring from needle to end from a JSON
-    constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") {
-        auto pos = json_str.rfind(needle);
-        if (pos == std::string::npos) {
-            return false;
-        }
-        for (auto i = pos + needle.size(); i < json_str.size(); ++i) {
-            unsigned char ch = static_cast<unsigned char>(json_str[i]);
-            if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) {
-                return false;
-            }
-        }
-        if (pos != 0 && json_str[pos - 1] == '"') {
-            --pos;
-        }
-        json_str.resize(pos);
-        return true;
-    };
-    // Helper to generate a partial argument JSON
-    constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) {
-        auto rest = builder.consume_rest();
-        utf8_truncate_safe_resize(rest);
-        set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG");
-        auto tool_str = arguments.dump();
-        if (partial_json(tool_str)) {
-            if (builder.add_tool_call(function_name, "", tool_str)) {
-                return;
-            }
-        }
-        LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str());
-    };
-    // Helper to find a close (because there may be form.last_val_end or form.last_tool_end)
-    constexpr auto try_find_close = [](
-            common_chat_msg_parser & builder,
-            const std::string & end,
-            const std::optional<std::string> & alt_end,
-            const std::string & end_next,
-            const std::optional<std::string> & alt_end_next
-    ) {
-        auto saved_pos = builder.pos();
-        auto tc = builder.try_find_literal(end);
-        auto val_end_size = end.size();
-        if (alt_end) {
-            auto pos_1 = builder.pos();
-            builder.move_to(saved_pos);
-            auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next);
-            if (alt_end_next) {
-                builder.move_to(saved_pos);
-                auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next);
-                if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) {
-                    tc2 = tc3;
-                }
-            }
-            if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) {
-                tc = tc2;
-                tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size());
-                builder.move_to(tc->groups[0].end);
-                val_end_size = alt_end->size();
-            } else {
-                builder.move_to(pos_1);
-            }
-        }
-        return std::make_pair(val_end_size, tc);
-    };
-    // Helper to find a val_end or last_val_end, returns matched pattern size
-    const auto try_find_val_end = [try_find_close, &builder, &form]() {
-        return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end);
-    };
-    // Helper to find a tool_end or last_tool_end, returns matched pattern size
-    const auto try_find_tool_end = [try_find_close, &builder, &form]() {
-        return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt);
-    };
-
-    bool recovery = true;
-    const auto start_pos = builder.pos();
-    if (!all_space(form.scope_start)) {
-        if (auto tc = builder.try_find_literal(form.scope_start)) {
-            if (all_space(tc->prelude)) {
-                if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin)
-                    throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start));
-            } else {
-                builder.move_to(start_pos);
-                return false;
-            }
-        } else return false;
-    }
-    while (auto tc = builder.try_find_literal(form.tool_start)) {
-        if (!all_space(tc->prelude)) {
-            LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
-                    gbnf_format_literal(form.tool_start).c_str(),
-                    gbnf_format_literal(tc->prelude).c_str()
-            );
-            builder.move_to(tc->groups[0].begin - tc->prelude.size());
-            break;
-        }
-
-        // Find tool name
-        auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep);
-        if (!func_name) {
-            auto [sz, tc] = try_find_tool_end();
-            func_name = tc;
-        }
-        if (!func_name) {
-            // Partial tool name not supported
-            throw common_chat_msg_partial_exception("incomplete tool_call");
-        }
-        // If the model generate multiple tool call and the first tool call has no argument
-        if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) {
-            builder.move_to(func_name->groups[0].begin - func_name->prelude.size());
-            auto [sz, tc] = try_find_tool_end();
-            func_name = tc;
-        }
-
-        // Parse tool name
-        builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end);
-        std::string function_name = string_strip(func_name->prelude);
-        // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
-        if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) {
-            if (string_starts_with(function_name, "functions.")) {
-                static const std::regex re(":\\d+$");
-                if (std::regex_search(function_name, re)) {
-                    function_name = function_name.substr(10, function_name.rfind(":") - 10);
-                }
-            }
-        }
-
-        // Argument JSON
-        json arguments = json::object();
-
-        // Helper to generate a partial argument JSON
-        const auto gen_partial_args = [&](auto set_partial_arg) {
-            gen_partial_json(set_partial_arg, arguments, builder, function_name);
-        };
-
-        // Parse all arg_key/arg_value pairs
-        while (auto tc = builder.try_find_literal(form.key_start)) {
-            if (!all_space(tc->prelude)) {
-                LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
-                        gbnf_format_literal(form.key_start).c_str(),
-                        gbnf_format_literal(tc->prelude).c_str()
-                );
-                builder.move_to(tc->groups[0].begin - tc->prelude.size());
-                break;
-            }
-            if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) {
-                auto tool_call_arg = arguments.dump();
-                if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
-                    tool_call_arg.resize(tool_call_arg.size() - 1);
-                }
-                builder.add_tool_call(function_name, "", tool_call_arg);
-                throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
-            }
-
-            // Parse arg_key
-            auto key_res = builder.try_find_literal(form.key_val_sep);
-            if (!key_res) {
-                gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";});
-                throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start));
-            }
-            if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) {
-                gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";});
-                throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep));
-            }
-            auto &key = key_res->prelude;
-            recovery = false;
-
-            // Parse arg_value
-            if (form.key_val_sep2) {
-                if (auto tc = builder.try_find_literal(*form.key_val_sep2)) {
-                    if (!all_space(tc->prelude)) {
-                        LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n",
-                                gbnf_format_literal(tc->prelude).c_str(),
-                                gbnf_format_literal(form.key_val_sep).c_str(),
-                                gbnf_format_literal(*form.key_val_sep2).c_str()
-                        );
-                        return return_error(builder, start_pos, false);
-                    }
-                    if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                        throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2));
-                    }
-                } else {
-                    gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                    throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep));
-                }
-            }
-            auto val_start = builder.pos();
-
-            // Test if arg_val is a partial JSON
-            std::optional<common_json> value_json = std::nullopt;
-            if (!form.raw_argval || !*form.raw_argval) {
-                try { value_json = builder.try_consume_json(); }
-                catch (const std::runtime_error&) { builder.move_to(val_start); }
-                // TODO: Delete this when json_partial adds top-level support for null/true/false
-                if (builder.pos() == val_start) {
-                    const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)");
-                    builder.consume_spaces();
-                    std::string_view sv = utf8_truncate_safe_view(builder.input());
-                    sv.remove_prefix(builder.pos());
-                    std::string rest = "a";
-                    if (sv.size() < 6) rest = sv;
-                    if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) {
-                        value_json = {123, {"123", "123"}};
-                        builder.consume_rest();
-                    } else {
-                        builder.move_to(val_start);
-                    }
-                }
-            }
-
-            // If it is a JSON and followed by </arg_value>, parse as json
-            // cannot support streaming because it may be a plain text starting with JSON
-            if (value_json) {
-                auto json_end = builder.pos();
-                builder.consume_spaces();
-                if (builder.pos() == builder.input().size()) {
-                    if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) {
-                        arguments[key] = value_json->json;
-                        auto json_str = arguments.dump();
-                        if (!value_json->healing_marker.json_dump_marker.empty()) {
-                            GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker));
-                            json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker));
-                        } else {
-                            GGML_ASSERT(json_str.back() == '}');
-                            json_str.resize(json_str.size() - 1);
-                        }
-                        builder.add_tool_call(function_name, "", json_str);
-                    } else {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                    }
-                    LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str());
-                    throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations.");
-                }
-                builder.move_to(json_end);
-                auto [val_end_size, tc] = try_find_val_end();
-                if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) {
-                    if (tc->groups[0].end - tc->groups[0].begin != val_end_size) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                        LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str());
-                        throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : ""));
-                    } else arguments[key] = value_json->json;
-                } else builder.move_to(val_start);
-            }
-
-            // If not, parse as plain text
-            if (val_start == builder.pos()) {
-                if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) {
-                    auto &value_str = value_plain->prelude;
-                    if (form.trim_raw_argval) value_str = string_strip(value_str);
-                    if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;});
-                        throw common_chat_msg_partial_exception(
-                                "Expected " + gbnf_format_literal(form.val_end) +
-                                " after " + gbnf_format_literal(form.key_val_sep) +
-                                (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
-                        );
-                    }
-                    arguments[key] = value_str;
-                } else {
-                    if (form.trim_raw_argval) {
-                        gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;});
-                    } else {
-                        gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;});
-                    }
-                    throw common_chat_msg_partial_exception(
-                            "Expected " + gbnf_format_literal(form.val_end) +
-                            " after " + gbnf_format_literal(form.key_val_sep) +
-                            (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
-                    );
-                }
-            }
-        }
-
-        // Consume closing tag
-        if (auto [tool_end_size, tc] = try_find_tool_end(); tc) {
-            if (!all_space(tc->prelude)) {
-                LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                        gbnf_format_literal(form.tool_end).c_str(),
-                        gbnf_format_literal(tc->prelude).c_str()
-                );
-                return return_error(builder, start_pos, recovery);
-            }
-            if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) {
-                // Add the parsed tool call
-                if (!builder.add_tool_call(function_name, "", arguments.dump())) {
-                    throw common_chat_msg_partial_exception("Failed to add XML-Style tool call");
-                }
-                recovery = false;
-                continue;
-            }
-        }
-
-        auto tool_call_arg = arguments.dump();
-        if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
-            tool_call_arg.resize(tool_call_arg.size() - 1);
-        }
-        builder.add_tool_call(function_name, "", tool_call_arg);
-        throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end));
-    }
-    if (auto tc = builder.try_find_literal(form.scope_end)) {
-        if (!all_space(tc->prelude)) {
-            LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                    gbnf_format_literal(form.scope_end).c_str(),
-                    gbnf_format_literal(tc->prelude).c_str()
-            );
-            return return_error(builder, start_pos, recovery);
-        }
-    } else {
-        if (all_space(form.scope_end)) return true;
-        builder.consume_spaces();
-        if (builder.pos() == builder.input().size())
-            throw common_chat_msg_partial_exception("incomplete tool calls");
-        LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                gbnf_format_literal(form.scope_end).c_str(),
-                gbnf_format_literal(builder.consume_rest()).c_str()
-        );
-        return return_error(builder, start_pos, recovery);
-    }
-
-    return true;
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) {
-    auto pos = pos_;
-    auto tsize = result_.tool_calls.size();
-    try { return parse_xml_tool_calls(*this, form); }
-    catch (const xml_toolcall_syntax_exception&) {}
-    move_to(pos);
-    result_.tool_calls.resize(tsize);
-    return false;
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
- */
-inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>") {
-    constexpr auto rstrip = [](std::string &s) {
-        s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base()));
-    };
-    // Erase substring from l to r, along with additional spaces nearby
-    constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) {
-        while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast<unsigned char>(str[l])));
-        ++l;
-        while (++r < str.size() && std::isspace(static_cast<unsigned char>(str[r])));
-        if (l < r) str[l] = '\n';
-        if (l + 1 < r) str[l + 1] = '\n';
-        if (l != 0) l += 2;
-        str.erase(l, r - l);
-        return l;
-    };
-    constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
-        auto best_match = content.size();
-        for (auto pattern: list) {
-            if (pattern.size() == 0) continue;
-            for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
-                auto match_len = content.size() - match_idx;
-                if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
-                    best_match = match_idx;
-                }
-            }
-        }
-        if (content.size() > best_match) {
-            content.erase(best_match);
-        }
-    };
-    const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
-        return trim_suffix(content, {
-            start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start,
-            form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "",
-            form.val_end, form.last_val_end ? form.last_val_end->c_str() : "",
-            form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "",
-            form.scope_end
-        });
-    };
-
-
-    // Trim leading spaces without affecting keyword matching
-    static const common_regex spaces_regex("\\s*");
-    {
-        auto tc = builder.consume_regex(spaces_regex);
-        auto spaces = builder.str(tc.groups[0]);
-        auto s1 = spaces.size();
-        trim_potential_partial_word(spaces);
-        auto s2 = spaces.size();
-        builder.move_to(builder.pos() - (s1 - s2));
-    }
-
-    // Parse content
-    bool reasoning_unclosed = builder.syntax().thinking_forced_open;
-    std::string unclosed_reasoning_content("");
-    for (;;) {
-        auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start);
-        std::string content;
-        std::string tool_call_start;
-
-        if (tc) {
-            content = std::move(tc->prelude);
-            tool_call_start = builder.str(tc->groups[0]);
-            LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str());
-        } else {
-            content = builder.consume_rest();
-            utf8_truncate_safe_resize(content);
-        }
-
-        // Handle unclosed think block
-        if (reasoning_unclosed) {
-            if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
-                unclosed_reasoning_content += content;
-                if (!(form.allow_toolcall_in_think && tc)) {
-                    unclosed_reasoning_content += tool_call_start;
-                    continue;
-                }
-            } else {
-                reasoning_unclosed = false;
-                std::string reasoning_content;
-                if (pos == std::string::npos) {
-                    reasoning_content = std::move(content);
-                } else {
-                    reasoning_content = content.substr(0, pos);
-                    content.erase(0, pos + end_think.size());
-                }
-                if (builder.pos() == builder.input().size() && all_space(content)) {
-                    rstrip(reasoning_content);
-                    trim_potential_partial_word(reasoning_content);
-                    rstrip(reasoning_content);
-                    if (reasoning_content.empty()) {
-                        rstrip(unclosed_reasoning_content);
-                        trim_potential_partial_word(unclosed_reasoning_content);
-                        rstrip(unclosed_reasoning_content);
-                        if (unclosed_reasoning_content.empty()) continue;
-                    }
-                }
-                if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
-                    builder.add_content(start_think);
-                    builder.add_content(unclosed_reasoning_content);
-                    builder.add_content(reasoning_content);
-                    if (builder.pos() != builder.input().size() || !all_space(content))
-                        builder.add_content(end_think);
-                } else {
-                    builder.add_reasoning_content(unclosed_reasoning_content);
-                    builder.add_reasoning_content(reasoning_content);
-                }
-                unclosed_reasoning_content.clear();
-            }
-        }
-
-        // Handle multiple think block
-        bool toolcall_in_think = false;
-        for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) {
-            if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) {
-                if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-                    auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size());
-                    builder.add_reasoning_content(reasoning_content);
-                    think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1);
-                } else {
-                    think_start = think_end + end_think.size() - 1;
-                }
-            } else {
-                // This <tool_call> start is in thinking block, skip this tool call
-                // This <tool_call> start is in thinking block
-                if (form.allow_toolcall_in_think) {
-                    unclosed_reasoning_content = content.substr(think_start + start_think.size());
-                } else {
-                    unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
-                }
-                reasoning_unclosed = true;
-                content.resize(think_start);
-                toolcall_in_think = true;
-            }
-        }
-
-        if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-            rstrip(content);
-            // Handle unclosed </think> token from content: delete all </think> token
-            if (auto pos = content.rfind(end_think); pos != std::string::npos) {
-                while (pos != std::string::npos) {
-                    pos = erase_spaces(content, pos, pos + end_think.size() - 1);
-                    pos = content.rfind(end_think, pos);
-                }
-            }
-            // Strip if needed
-            if (content.size() > 0 && std::isspace(static_cast<unsigned char>(content[0]))) {
-                content = string_strip(content);
-            }
-        }
-
-        // remove potential partial suffix
-        if (builder.pos() == builder.input().size()) {
-            if (unclosed_reasoning_content.empty()) {
-                rstrip(content);
-                trim_potential_partial_word(content);
-                rstrip(content);
-            } else {
-                rstrip(unclosed_reasoning_content);
-                trim_potential_partial_word(unclosed_reasoning_content);
-                rstrip(unclosed_reasoning_content);
-            }
-        }
-
-        // consume unclosed_reasoning_content if allow_toolcall_in_think is set
-        if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
-            if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-                builder.add_reasoning_content(unclosed_reasoning_content);
-            } else {
-                if (content.empty()) {
-                    content = start_think + unclosed_reasoning_content;
-                } else {
-                    content += "\n\n" + start_think;
-                    content += unclosed_reasoning_content;
-                }
-            }
-            unclosed_reasoning_content.clear();
-        }
-
-        // Add content
-        if (!content.empty()) {
-            // If there are multiple content blocks
-            if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
-                builder.add_content("\n\n");
-            }
-            builder.add_content(content);
-        }
-
-        // This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
-        if (toolcall_in_think && !form.allow_toolcall_in_think) {
-            continue;
-        }
-
-        // There is no tool call and all content is parsed
-        if (!tc) {
-            GGML_ASSERT(builder.pos() == builder.input().size());
-            GGML_ASSERT(unclosed_reasoning_content.empty());
-            if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
-            break;
-        }
-
-        builder.move_to(tc->groups[0].begin);
-        if (builder.try_consume_xml_tool_calls(form)) {
-            auto end_of_tool = builder.pos();
-            builder.consume_spaces();
-            if (builder.pos() != builder.input().size()) {
-                builder.move_to(end_of_tool);
-                if (!builder.result().content.empty()) {
-                    builder.add_content("\n\n");
-                }
-            }
-        } else {
-            static const common_regex next_char_regex(".");
-            auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]);
-            rstrip(c);
-            builder.add_content(c);
-        }
-    }
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- */
-void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
-    parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
-}
diff --git a/common/chat-parser-xml-toolcall.h b/common/chat-parser-xml-toolcall.h
deleted file mode 100644
index b309fb6670..0000000000
--- a/common/chat-parser-xml-toolcall.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include "chat.h"
-
-#include <nlohmann/json.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-
-// Sample config:
-// MiniMax-M2 (left): <minimax:tool_call>\n<invoke name="tool-name">\n<parameter name="key">value</parameter>\n...</invoke>\n...</minimax:tool_call>
-// GLM 4.5   (right): <tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>
-struct xml_tool_call_format {
-    std::string scope_start; // <minimax:tool_call>\n  // \n                      // can be empty
-    std::string tool_start;  // <invoke name=\"        // <tool_call>
-    std::string tool_sep;    // \">\n                  // \n                      // can be empty only for parse_xml_tool_calls
-    std::string key_start;   // <parameter name=\"     // <arg_key>
-    std::string key_val_sep; // \">                    // </arg_key>\n<arg_value>
-    std::string val_end;     // </parameter>\n         // </arg_value>\n
-    std::string tool_end;    // </invoke>\n            // </tool_call>\n
-    std::string scope_end;   // </minimax:tool_call>   //                         // can be empty
-    // Set this if there can be dynamic spaces inside key_val_sep.
-    // e.g. key_val_sep=</arg_key> key_val_sep2=<arg_value> for GLM4.5
-    std::optional<std::string> key_val_sep2 = std::nullopt;
-    // Set true if argval should only be raw string. e.g. Hello "world" hi
-    // Set false if argval should only be json string. e.g. "Hello \"world\" hi"
-    // Defaults to std::nullopt, both will be allowed.
-    std::optional<bool> raw_argval = std::nullopt;
-    std::optional<std::string> last_val_end = std::nullopt;
-    std::optional<std::string> last_tool_end = std::nullopt;
-    bool trim_raw_argval = false;
-    bool allow_toolcall_in_think = false;
-};
-
-// make a GBNF that accept any strings except those containing any of the forbidden strings.
-std::string make_gbnf_excluding(std::vector<std::string> forbids);
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form);
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
deleted file mode 100644
index 29819e48d3..0000000000
--- a/common/chat-parser.cpp
+++ /dev/null
@@ -1,1669 +0,0 @@
-#include "chat-parser.h"
-#include "chat-peg-parser.h"
-#include "common.h"
-#include "log.h"
-#include "peg-parser.h"
-#include "regex-partial.h"
-
-#include <algorithm>
-#include <cctype>
-#include <optional>
-#include <stdexcept>
-#include <string>
-#include <string_view>
-#include <vector>
-
-using json = nlohmann::ordered_json;
-
-static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
-                                                const common_regex &     prefix,
-                                                size_t                   rstrip_prefix = 0) {
-    static const std::vector<std::vector<std::string>> args_paths = { { "arguments" } };
-    if (auto res = builder.try_find_regex(prefix)) {
-        builder.move_back(rstrip_prefix);
-        auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
-        if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call array");
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
-    std::string arguments;
-    if (builder.is_partial()) {
-        arguments = (json{
-                         { "code", code + builder.healing_marker() }
-        })
-                        .dump();
-        auto idx = arguments.find(builder.healing_marker());
-        if (idx != std::string::npos) {
-            arguments.resize(idx);
-        }
-    } else {
-        arguments = (json{
-                         { "code", code }
-        })
-                        .dump();
-    }
-    return arguments;
-}
-
-/**
- * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
- * Aggregates the prefix, suffix and in-between text into the content.
- */
-static void parse_json_tool_calls(
-    common_chat_msg_parser &            builder,
-    const std::optional<common_regex> & block_open,
-    const std::optional<common_regex> & function_regex_start_only,
-    const std::optional<common_regex> & function_regex,
-    const common_regex &                close_regex,
-    const std::optional<common_regex> & block_close,
-    bool                                allow_raw_python = false,
-    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name =
-        nullptr) {
-    auto parse_tool_calls = [&]() {
-        size_t from  = std::string::npos;
-        auto   first = true;
-        while (true) {
-            auto start_pos = builder.pos();
-            auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
-                       function_regex                     ? builder.try_find_regex(*function_regex, from) :
-                                                            std::nullopt;
-
-            if (res) {
-                std::string name;
-                if (get_function_name) {
-                    name = get_function_name(*res);
-                } else {
-                    GGML_ASSERT(res->groups.size() == 2);
-                    name = builder.str(res->groups[1]);
-                }
-                first = false;
-                if (name.empty()) {
-                    // get_function_name signalled us that we should skip this match and treat it as content.
-                    from = res->groups[0].begin + 1;
-                    continue;
-                }
-                from = std::string::npos;
-
-                auto maybe_raw_python = name == "python" && allow_raw_python;
-                if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
-                    if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
-                        if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
-                            throw common_chat_msg_partial_exception("incomplete tool call");
-                        }
-                        builder.consume_regex(close_regex);
-                    }
-                    continue;
-                }
-                if (maybe_raw_python) {
-                    auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-                    if (!builder.add_tool_call(name, "", arguments)) {
-                        throw common_chat_msg_partial_exception("incomplete tool call");
-                    }
-                    return;
-                }
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            } else {
-                builder.move_to(start_pos);
-            }
-            break;
-        }
-        if (block_close) {
-            builder.consume_regex(*block_close);
-        }
-        builder.consume_spaces();
-        builder.add_content(builder.consume_rest());
-    };
-    if (block_open) {
-        if (auto res = builder.try_find_regex(*block_open)) {
-            parse_tool_calls();
-        } else {
-            builder.add_content(builder.consume_rest());
-        }
-    } else {
-        parse_tool_calls();
-    }
-}
-
-common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
-    : input_(input), is_partial_(is_partial), syntax_(syntax)
-{
-    result_.role = "assistant";
-
-    while (true) {
-        std::string id = std::to_string(std::rand());
-        if (input.find(id) == std::string::npos) {
-            healing_marker_ = id;
-            break;
-        }
-    }
-}
-
-std::string common_chat_msg_parser::str(const common_string_range & rng) const {
-    GGML_ASSERT(rng.begin <= rng.end);
-    return input_.substr(rng.begin, rng.end - rng.begin);
-}
-
-void common_chat_msg_parser::add_content(const std::string &content) {
-    result_.content += content;
-}
-
-void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
-    result_.reasoning_content += reasoning_content;
-}
-
-bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
-    if (name.empty()) {
-        return false;
-    }
-
-    common_chat_tool_call tool_call;
-    tool_call.name = name;
-    tool_call.arguments = arguments;
-    tool_call.id = id;
-
-    // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
-    result_.tool_calls.emplace_back(tool_call);
-
-    return true;
-}
-bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
-    std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
-    std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
-    std::string arguments = "";
-    if (tool_call.contains("arguments")) {
-        if (tool_call.at("arguments").is_object()) {
-            arguments = tool_call.at("arguments").dump();
-        } else {
-            arguments = tool_call.at("arguments");
-        }
-    }
-
-    return add_tool_call(name, id, arguments);
-}
-
-bool common_chat_msg_parser::add_tool_calls(const json & arr) {
-    for (const auto & item : arr) {
-        if (!add_tool_call(item)) {
-            return false;
-        }
-    }
-    return true;
-}
-
-bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) {
-    if (!tool_call.is_object() || tool_call.size() != 1) {
-        return false;
-    }
-
-    // Get the tool name (the single key in the object)
-    auto it = tool_call.begin();
-    std::string name = it.key();
-
-    if (name.empty()) {
-        return false;
-    }
-
-    // Get the arguments (the nested object)
-    const json & args_json = it.value();
-    std::string arguments = "";
-
-    if (args_json.is_object()) {
-        arguments = args_json.dump();
-    } else if (args_json.is_string()) {
-        arguments = args_json;
-    } else if (!args_json.is_null()) {
-        // For other types, convert to string representation
-        arguments = args_json.dump();
-    }
-
-    return add_tool_call(name, "", arguments);
-}
-void common_chat_msg_parser::finish() {
-    if (!is_partial_ && pos_ != input_.size()) {
-        throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
-    }
-}
-
-bool common_chat_msg_parser::consume_spaces() {
-    const auto length = input_.size();
-    auto consumed = false;
-    while (pos_ < length && std::isspace(input_[pos_])) {
-        ++pos_;
-        consumed = true;
-    }
-    return consumed;
-}
-
-bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
-    auto pos = pos_;
-    for (auto i = 0u; i < literal.size(); ++i) {
-        if (pos >= input_.size()) {
-            return false;
-        }
-        if (input_[pos] != literal[i]) {
-            return false;
-        }
-        ++pos;
-    }
-    pos_ = pos;
-    return true;
-}
-
-std::optional<common_chat_msg_parser::find_regex_result>  common_chat_msg_parser::try_find_literal(const std::string & literal) {
-    auto idx = input_.find(literal, pos_);
-    if (idx != std::string::npos) {
-        find_regex_result res;
-        res.prelude = input_.substr(pos_, idx - pos_);
-        auto end = idx + literal.size();
-        res.groups.emplace_back(common_string_range{idx, end});
-        move_to(end);
-        return res;
-    }
-    if (is_partial_) {
-        idx = string_find_partial_stop(input_, literal);
-        if (idx != std::string::npos && idx >= pos_) {
-            find_regex_result res;
-            res.prelude = input_.substr(pos_, idx - pos_);
-            auto end = input_.size();
-            res.groups.emplace_back(common_string_range{idx, end});
-            move_to(end);
-            return res;
-        }
-    }
-    return std::nullopt;
-}
-
-void common_chat_msg_parser::consume_literal(const std::string & literal) {
-    if (!try_consume_literal(literal)) {
-        throw common_chat_msg_partial_exception(literal);
-    }
-}
-
-bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
-    std::string pending_reasoning_prefix;
-
-    if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-        return false;
-    }
-
-    auto set_reasoning_prefix = [&](size_t prefix_pos) {
-        if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) {
-            return;
-        }
-        if (prefix_pos + start_think.size() > input_.size()) {
-            pending_reasoning_prefix.clear();
-            return;
-        }
-        // Capture the exact literal that opened the reasoning section so we can
-        // surface it back to callers. This ensures formats that force the
-        // reasoning tag open (e.g. DeepSeek R1) retain their original prefix
-        // instead of dropping it during parsing.
-        pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size());
-    };
-
-    auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
-        auto stripped_reasoning = string_strip(reasoning);
-        if (stripped_reasoning.empty()) {
-            return;
-        }
-        if (syntax_.reasoning_in_content) {
-            add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
-            add_content(stripped_reasoning);
-            if (closed) {
-                add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
-            }
-        } else {
-            if (!pending_reasoning_prefix.empty()) {
-                add_reasoning_content(pending_reasoning_prefix);
-                pending_reasoning_prefix.clear();
-            }
-            add_reasoning_content(stripped_reasoning);
-        }
-    };
-
-    const size_t saved_pos = pos_;
-    const size_t saved_content_size = result_.content.size();
-    const size_t saved_reasoning_size = result_.reasoning_content.size();
-
-    auto restore_state = [&]() {
-        move_to(saved_pos);
-        result_.content.resize(saved_content_size);
-        result_.reasoning_content.resize(saved_reasoning_size);
-    };
-
-    // Allow leading whitespace to be preserved as content when reasoning is present at the start
-    size_t cursor = pos_;
-    size_t whitespace_end = cursor;
-    while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) {
-        ++whitespace_end;
-    }
-
-    if (whitespace_end >= input_.size()) {
-        restore_state();
-        if (syntax_.thinking_forced_open) {
-            auto rest = input_.substr(saved_pos);
-            if (!rest.empty()) {
-                handle_reasoning(rest, /* closed */ !is_partial());
-            }
-            move_to(input_.size());
-            return true;
-        }
-        return false;
-    }
-
-    cursor = whitespace_end;
-    const size_t remaining = input_.size() - cursor;
-    const size_t start_prefix = std::min(start_think.size(), remaining);
-    const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0;
-
-    if (has_start_tag && start_prefix < start_think.size()) {
-        move_to(input_.size());
-        return true;
-    }
-
-    if (has_start_tag) {
-        if (whitespace_end > pos_) {
-            add_content(input_.substr(pos_, whitespace_end - pos_));
-        }
-        set_reasoning_prefix(cursor);
-        cursor += start_think.size();
-    } else if (syntax_.thinking_forced_open) {
-        cursor = whitespace_end;
-    } else {
-        restore_state();
-        return false;
-    }
-    while (true) {
-        if (cursor >= input_.size()) {
-            move_to(input_.size());
-            return true;
-        }
-
-        size_t end_pos = input_.find(end_think, cursor);
-        if (end_pos == std::string::npos) {
-            std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor);
-            size_t partial_off = string_find_partial_stop(remaining_view, end_think);
-            size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off;
-            if (reasoning_end > cursor) {
-                handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial());
-            }
-            move_to(input_.size());
-            return true;
-        }
-
-        if (end_pos > cursor) {
-            handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true);
-        } else {
-            handle_reasoning("", /* closed */ true);
-        }
-
-        cursor = end_pos + end_think.size();
-
-        while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) {
-            ++cursor;
-        }
-
-        const size_t next_remaining = input_.size() - cursor;
-        if (next_remaining == 0) {
-            move_to(cursor);
-            return true;
-        }
-
-        const size_t next_prefix = std::min(start_think.size(), next_remaining);
-        if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) {
-            if (next_prefix < start_think.size()) {
-                move_to(input_.size());
-                return true;
-            }
-            set_reasoning_prefix(cursor);
-            cursor += start_think.size();
-            continue;
-        }
-
-        move_to(cursor);
-        return true;
-    }
-}
-
-std::string common_chat_msg_parser::consume_rest() {
-    auto rest = input_.substr(pos_);
-    pos_ = input_.size();
-    return rest;
-}
-
-// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
-    auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
-    if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
-        return std::nullopt;
-    }
-    auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
-    pos_ = m.groups[0].end;
-
-    if (add_prelude_to_content) {
-        add_content(prelude);
-    }
-    if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
-        if (is_partial()) {
-            throw common_chat_msg_partial_exception(regex.str());
-        }
-        return std::nullopt;
-    }
-    return find_regex_result{prelude, m.groups};
-}
-
-common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
-    if (auto result = try_consume_regex(regex)) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception(regex.str());
-}
-
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
-    auto m = regex.search(input_, pos_);
-    if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
-        return std::nullopt;
-    }
-    if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
-        if (is_partial()) {
-            throw common_chat_msg_partial_exception(regex.str());
-        }
-        return std::nullopt;
-    }
-    if (m.groups[0].begin != pos_) {
-        // Didn't match at the current position.
-        return std::nullopt;
-    }
-    pos_ = m.groups[0].end;
-
-    return find_regex_result {
-        /* .prelude = */ "",
-        m.groups,
-    };
-}
-
-std::optional<common_json> common_chat_msg_parser::try_consume_json() {
-    auto it = input_.cbegin() + pos_;
-    const auto end = input_.cend();
-    common_json result;
-    if (!common_json_parse(it, end, healing_marker_, result)) {
-        return std::nullopt;
-    }
-    pos_ = std::distance(input_.cbegin(), it);
-    if (result.healing_marker.marker.empty()) {
-        // No healing marker, just return the parsed json
-        return result;
-    }
-    if (!is_partial()) {
-        throw common_chat_msg_partial_exception("JSON");
-    }
-    return result;
-}
-
-common_json common_chat_msg_parser::consume_json() {
-    if (auto result = try_consume_json()) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception("JSON");
-}
-
-common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
-    const std::vector<std::vector<std::string>> & args_paths,
-    const std::vector<std::vector<std::string>> & content_paths
-) {
-    if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception("JSON");
-}
-
-std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
-    const std::vector<std::vector<std::string>> & args_paths,
-    const std::vector<std::vector<std::string>> & content_paths
-) {
-    auto partial = try_consume_json();
-    if (!partial) {
-        return std::nullopt;
-    }
-    auto is_arguments_path = [&](const std::vector<std::string> & path) {
-        return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
-    };
-    auto is_content_path = [&](const std::vector<std::string> & path) {
-        return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
-    };
-
-    if (partial->healing_marker.marker.empty()) {
-        if (args_paths.empty()) {
-            // No arguments to dump, and JSON was parsed fully.
-            return consume_json_result {
-                partial->json,
-                /* .is_partial = */ false,
-            };
-        }
-        if (is_arguments_path({})) {
-            // Entire JSON is the arguments and was parsed fully.
-            return consume_json_result {
-                partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
-                /* .is_partial = */ false,
-            };
-        }
-    }
-
-    LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-
-    auto found_healing_marker = false;
-    std::vector<std::string> path;
-    std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
-        if (is_arguments_path(path)) {
-            auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
-            if (is_partial() && !partial->healing_marker.marker.empty()) {
-                auto idx = arguments.find(partial->healing_marker.json_dump_marker);
-                if (idx != std::string::npos) {
-                    arguments.resize(idx);
-                    found_healing_marker = true;
-                }
-                if (arguments == "\"") {
-                    // This happens because of completing `:"$magic` after `"arguments"`
-                    arguments = "";
-                }
-            }
-            return arguments;
-        }
-        if (is_content_path(path)) {
-            if (!j.is_string()) {
-                throw std::runtime_error("Content path must be a string");
-            }
-            std::string str = j;
-            auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
-            if (idx != std::string::npos) {
-                str.resize(idx);
-                found_healing_marker = true;
-            }
-            return str;
-        }
-        if (j.is_object()) {
-            auto obj = json::object();
-            for (const auto & p : j.items()) {
-                const auto & key = p.key();
-                const auto & value = p.value();
-                const std::string key_str = key; // NOLINT
-                auto idx = key_str.find(healing_marker_);
-                if (idx != std::string::npos) {
-                    found_healing_marker = true;
-                    break;
-                }
-                path.push_back(key_str);
-                if (value.is_string()) {
-                    const std::string value_str = value;
-                    if (value_str.find(healing_marker_) != std::string::npos) {
-                        found_healing_marker = true;
-                        if (is_content_path(path)) {
-                            if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
-                                // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
-                                obj[key] = remove_unsupported_healings_and_dump_args(value);
-                            }
-                        }
-                        break;
-                    }
-                    obj[key] = value;
-                } else {
-                    obj[key] = remove_unsupported_healings_and_dump_args(value);
-                }
-                path.pop_back();
-            }
-            return obj;
-        }
-        if (j.is_array()) {
-            auto arr = json::array();
-            for (const auto & value : j) {
-                if (value.is_string()) {
-                    std::string str = value;
-                    auto idx = str.find(healing_marker_);
-                    if (idx != std::string::npos) {
-                        // Don't heal array values that aren't in the arguments.
-                        found_healing_marker = true;
-                        break;
-                    }
-                }
-                arr.push_back(remove_unsupported_healings_and_dump_args(value));
-            }
-            return arr;
-        }
-        return j;
-    };
-
-    auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
-    LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-    return consume_json_result {
-        cleaned,
-        /* .is_partial = */ found_healing_marker,
-    };
-}
-
-void common_chat_msg_parser::clear_tools() {
-    result_.tool_calls.clear();
-}
-
-/**
- * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
- * to reduce incremental compile time for parser changes.
- */
-static void common_chat_parse_generic(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    static const std::vector<std::vector<std::string>> content_paths = {
-        {"response"},
-    };
-    static const std::vector<std::vector<std::string>> args_paths = {
-        {"tool_call", "arguments"},
-        {"tool_calls", "arguments"},
-    };
-    auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
-    if (data.value.contains("tool_calls")) {
-        if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool calls");
-        }
-    } else if (data.value.contains("tool_call")) {
-        if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-    } else if (data.value.contains("response")) {
-        const auto & response = data.value.at("response");
-        builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
-        if (data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete response");
-        }
-    } else {
-        throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
-    }
-}
-
-static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
-    parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("[THINK]", "[/THINK]");
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
-    parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
-    static const common_regex start_action_regex("<\\|START_ACTION\\|>");
-    static const common_regex end_action_regex("<\\|END_ACTION\\|>");
-    static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
-    static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
-    if (auto res = builder.try_find_regex(start_action_regex)) {
-        // If we didn't extract thoughts, prelude includes them.
-        auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
-        for (const auto & tool_call : tool_calls.value) {
-            std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
-            std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
-            std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
-            if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-        if (tool_calls.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-        builder.consume_regex(end_action_regex);
-    } else if (auto res = builder.try_find_regex(start_response_regex)) {
-        if (!builder.try_find_regex(end_response_regex)) {
-            builder.add_content(builder.consume_rest());
-            throw common_chat_msg_partial_exception(end_response_regex.str());
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
-    builder.try_parse_reasoning("<think>", "</think>");
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex function_regex(
-        "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
-    static const common_regex close_regex("\\}\\s*");
-
-    static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
-    static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
-
-    if (with_builtin_tools) {
-        static const common_regex builtin_call_regex("<\\|python_tag\\|>");
-        if (auto res = builder.try_find_regex(builtin_call_regex)) {
-            auto fun_res = builder.consume_regex(function_name_regex);
-            auto function_name = builder.str(fun_res.groups[1]);
-
-            common_healing_marker healing_marker;
-            json args = json::object();
-            while (true) {
-                if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
-                    auto arg_name = builder.str(arg_res->groups[1]);
-                    auto partial = builder.consume_json();
-                    args[arg_name] = partial.json;
-                    healing_marker.marker = partial.healing_marker.marker;
-                    healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
-                    builder.consume_spaces();
-                    if (!builder.try_consume_literal(",")) {
-                        break;
-                    }
-                } else {
-                    break;
-                }
-            }
-            builder.consume_literal(")");
-            builder.consume_spaces();
-
-            auto arguments = args.dump();
-            if (!builder.add_tool_call(function_name, "", arguments)) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            return;
-        }
-    }
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ std::nullopt,
-        /* function_regex_start_only= */ function_regex,
-        /* function_regex= */ std::nullopt,
-        close_regex,
-        std::nullopt);
-
-}
-
-static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
-    static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
-    static const common_regex function_regex("(?:<｜tool▁call▁begin｜>)?function<｜tool▁sep｜>([^\n]+)\n```json\n");
-    static const common_regex close_regex("```[\\s\\r\\n]*<｜tool▁call▁end｜>");
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ tool_calls_begin,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
-    static const common_regex function_regex("(?:<｜tool▁call▁begin｜>)?([^\\n<]+)(?:<｜tool▁sep｜>)");
-
-    static const common_regex close_regex("(?:[\\s]*)?<｜tool▁call▁end｜>");
-    static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
-    static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
-
-    if (!builder.syntax().parse_tool_calls) {
-        LOG_DBG("%s: not parse_tool_calls\n", __func__);
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    LOG_DBG("%s: parse_tool_calls\n", __func__);
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ tool_calls_begin,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
-    // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
-    // First try to parse using the standard reasoning parsing method
-    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
-    auto start_pos = builder.pos();
-    auto found_end_think = builder.try_find_literal("</think>");
-    builder.move_to(start_pos);
-
-    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
-        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
-        common_chat_parse_deepseek_v3_1_content(builder);
-    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
-        // If reasoning was parsed successfully, the remaining content is regular content
-        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
-        // </think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>NAME\n```json\nJSON\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>
-        common_chat_parse_deepseek_v3_1_content(builder);
-    } else {
-        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
-          common_chat_parse_deepseek_v3_1_content(builder);
-          return;
-        }
-        // If no reasoning tags found, check if we should treat everything as reasoning
-        if (builder.syntax().thinking_forced_open) {
-            // If thinking is forced open but no tags found, treat everything as reasoning
-            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
-            builder.add_reasoning_content(builder.consume_rest());
-        } else {
-            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
-            // <｜tool▁call▁begin｜>NAME<｜tool▁sep｜>JSON<｜tool▁call▁end｜>
-            common_chat_parse_deepseek_v3_1_content(builder);
-        }
-    }
-}
-
-static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<minimax:tool_call>",
-        /* form.tool_start  = */ "<invoke name=\"",
-        /* form.tool_sep    = */ "\">",
-        /* form.key_start   = */ "<parameter name=\"",
-        /* form.key_val_sep = */ "\">",
-        /* form.val_end     = */ "</parameter>",
-        /* form.tool_end    = */ "</invoke>",
-        /* form.scope_end   = */ "</minimax:tool_call>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_call>";
-        form.tool_start  = "<function=";
-        form.tool_sep    = ">";
-        form.key_start   = "<parameter=";
-        form.key_val_sep = ">";
-        form.val_end     = "</parameter>";
-        form.tool_end    = "</function>";
-        form.scope_end   = "</tool_call>";
-        form.trim_raw_argval = true;
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<|tool_calls_section_begin|>";
-        form.tool_start  = "<|tool_call_begin|>";
-        form.tool_sep    = "<|tool_call_argument_begin|>{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\":";
-        form.val_end     = ",";
-        form.tool_end    = "}<|tool_call_end|>";
-        form.scope_end   = "<|tool_calls_section_end|>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.allow_toolcall_in_think = true;
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_calls>[";
-        form.tool_start  = "{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}, ";
-        form.scope_end   = "]</tool_calls>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.last_tool_end = "}";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
-}
-
-static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "";
-        form.tool_start  = "<tool_call>\n{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}\n</tool_call>";
-        form.scope_end   = "";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
-    static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
-    static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
-
-    static const common_regex start_regex("<\\|start\\|>assistant");
-    static const common_regex analysis_regex("<\\|channel\\|>analysis");
-    static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
-    static const common_regex preamble_regex("<\\|channel\\|>commentary");
-    static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
-    static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
-
-    auto consume_end = [&](bool include_end = false) {
-        if (auto res = builder.try_find_literal("<|end|>")) {
-            return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
-        }
-        return builder.consume_rest();
-    };
-
-    auto handle_tool_call = [&](const std::string & name) {
-        if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
-            if (builder.syntax().parse_tool_calls) {
-                if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-            } else if (args->is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-    };
-
-    auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
-        auto match = regex.search(input, 0, true);
-        if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
-            return match;
-        }
-        return std::nullopt;
-    };
-
-    do {
-        auto header_start_pos = builder.pos();
-        auto content_start = builder.try_find_literal("<|message|>");
-        if (!content_start) {
-            throw common_chat_msg_partial_exception("incomplete header");
-        }
-
-        auto header = content_start->prelude;
-
-        if (auto match = regex_match(tool_call1_regex, header)) {
-            auto group = match->groups[1];
-            auto name = header.substr(group.begin, group.end - group.begin);
-            handle_tool_call(name);
-            continue;
-        }
-
-        if (auto match = regex_match(tool_call2_regex, header)) {
-            auto group = match->groups[2];
-            auto name = header.substr(group.begin, group.end - group.begin);
-            handle_tool_call(name);
-            continue;
-        }
-
-        if (regex_match(analysis_regex, header)) {
-            builder.move_to(header_start_pos);
-            if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
-                builder.add_content(consume_end(true));
-            } else {
-                builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
-            }
-            continue;
-        }
-
-        if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
-            builder.add_content(consume_end());
-            continue;
-        }
-
-        // Possibly a malformed message, attempt to recover by rolling
-        // back to pick up the next <|start|>
-        LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
-        builder.move_to(header_start_pos);
-    } while (builder.try_find_regex(start_regex, std::string::npos, false));
-
-    auto remaining = builder.consume_rest();
-    if (!remaining.empty()) {
-        LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
-    }
-}
-
-static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start  = */ "",
-        /* form.tool_start   = */ "<tool_call>",
-        /* form.tool_sep     = */ "",
-        /* form.key_start    = */ "<arg_key>",
-        /* form.key_val_sep  = */ "</arg_key>",
-        /* form.val_end      = */ "</arg_value>",
-        /* form.tool_end     = */ "</tool_call>",
-        /* form.scope_end    = */ "",
-        /* form.key_val_sep2 = */ "<arg_value>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    static const common_regex prefix(regex_escape(" functools["));
-    parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
-}
-
-static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
-    static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
-    static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
-    static const common_regex close_regex(R"(\s*)");
-
-    parse_json_tool_calls(
-        builder,
-        std::nullopt,
-        function_regex_start_only,
-        function_regex,
-        close_regex,
-        std::nullopt,
-        /* allow_raw_python= */ true,
-        /* get_function_name= */ [&](const auto & res) -> std::string {
-            auto at_start = res.groups[0].begin == 0;
-            auto name = builder.str(res.groups[1]);
-            if (!name.empty() && name.back() == '{') {
-                // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
-                builder.move_back(1);
-            }
-            auto idx = name.find_last_not_of("\n{");
-            name = name.substr(0, idx + 1);
-            if (at_start && name == "all") {
-                return "";
-            }
-            return name;
-        });
-}
-
-static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
-    static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
-
-    static const common_regex function_regex(R"(<function=(\w+)>)");
-    static const common_regex close_regex(R"(</function>)");
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ std::nullopt,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        std::nullopt);
-
-    if (auto res = builder.try_find_regex(python_tag_regex)) {
-        auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-        builder.add_tool_call("python", "", arguments);
-        return;
-    }
-}
-
-static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex open_regex(
-        "(?:"
-            "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
-            "("                          // match 2 (open_tag)
-                "<tool_call>"
-                "|<function_call>"
-                "|<tool>"
-                "|<tools>"
-                "|<response>"
-                "|<json>"
-                "|<xml>"
-                "|<JSON>"
-            ")?"
-            "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
-        ")"
-        "|<function=([^>]+)>"            // match 4 (function name)
-        "|<function name=\"([^\"]+)\">"  // match 5 (function name again)
-    );
-
-    while (auto res = builder.try_find_regex(open_regex)) {
-        const auto & block_start = res->groups[1];
-        std::string block_end = block_start.empty() ? "" : "```";
-
-        const auto & open_tag = res->groups[2];
-        std::string close_tag;
-
-        if (!res->groups[3].empty()) {
-            builder.move_to(res->groups[3].begin);
-            close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
-
-            if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
-                if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-                builder.consume_spaces();
-                builder.consume_literal(close_tag);
-                builder.consume_spaces();
-                if (!block_end.empty()) {
-                    builder.consume_literal(block_end);
-                    builder.consume_spaces();
-                }
-            } else {
-                throw common_chat_msg_partial_exception("failed to parse tool call");
-            }
-        } else {
-            auto function_name = builder.str(res->groups[4]);
-            if (function_name.empty()) {
-                function_name = builder.str(res->groups[5]);
-            }
-            GGML_ASSERT(!function_name.empty());
-
-            close_tag = "</function>";
-
-            if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
-                if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-                builder.consume_spaces();
-                builder.consume_literal(close_tag);
-                builder.consume_spaces();
-                if (!block_end.empty()) {
-                    builder.consume_literal(block_end);
-                    builder.consume_spaces();
-                }
-            }
-        }
-    }
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_granite(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    static const common_regex start_think_regex(regex_escape("<think>"));
-    static const common_regex end_think_regex(regex_escape("</think>"));
-    // Granite models output partial tokens such as "<" and "<think".
-    // By leveraging try_consume_regex()/try_find_regex() throwing
-    // common_chat_msg_partial_exception for these partial tokens,
-    // processing is interrupted and the tokens are not passed to add_content().
-    if (auto res = builder.try_consume_regex(start_think_regex)) {
-        // Restore position for try_parse_reasoning()
-        builder.move_to(res->groups[0].begin);
-        builder.try_find_regex(end_think_regex, std::string::npos, false);
-        // Restore position for try_parse_reasoning()
-        builder.move_to(res->groups[0].begin);
-    }
-    builder.try_parse_reasoning("<think>", "</think>");
-
-    // Parse response tags
-    static const common_regex start_response_regex(regex_escape("<response>"));
-    static const common_regex end_response_regex(regex_escape("</response>"));
-    // Granite models output partial tokens such as "<" and "<response".
-    // Same hack as reasoning parsing.
-    if (builder.try_consume_regex(start_response_regex)) {
-        builder.try_find_regex(end_response_regex);
-    }
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        // Expect JSON array of tool calls
-        if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
-            if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        // Expect JSON array of tool calls
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            if (!builder.try_consume_literal("</TOOLCALL>")) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            builder.add_tool_calls(tool_calls_data.json);
-        } else {
-            throw common_chat_msg_partial_exception("Incomplete tool call");
-        }
-    }
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            builder.consume_spaces();
-            if (!builder.try_consume_literal("<|tools_suffix|>")) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            for (const auto & value : tool_calls_data.json) {
-                if (value.is_object()) {
-                    builder.add_tool_call_short_form(value);
-                }
-            }
-        } else {
-            throw common_chat_msg_partial_exception("Incomplete tool call");
-        }
-    }
-    builder.add_content(builder.consume_rest());
-}
-
-
-static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
-    static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
-    static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
-
-    // Loop through all tool calls
-    while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
-        builder.move_to(res->groups[0].end);
-
-        // Parse JSON array format: [{"name": "...", "arguments": {...}}]
-        auto tool_calls_data = builder.consume_json();
-
-        // Consume end marker
-        builder.consume_spaces();
-        if (!builder.try_consume_regex(tool_call_end_regex)) {
-            throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
-        }
-
-        // Process each tool call in the array
-        if (tool_calls_data.json.is_array()) {
-            for (const auto & tool_call : tool_calls_data.json) {
-                if (!tool_call.is_object()) {
-                    throw common_chat_msg_partial_exception("Tool call must be an object");
-                }
-
-                if (!tool_call.contains("name")) {
-                    throw common_chat_msg_partial_exception("Tool call missing 'name' field");
-                }
-
-                std::string function_name = tool_call.at("name");
-                std::string arguments = "{}";
-
-                if (tool_call.contains("arguments")) {
-                    if (tool_call.at("arguments").is_object()) {
-                        arguments = tool_call.at("arguments").dump();
-                    } else if (tool_call.at("arguments").is_string()) {
-                        arguments = tool_call.at("arguments");
-                    }
-                }
-
-                if (!builder.add_tool_call(function_name, "", arguments)) {
-                    throw common_chat_msg_partial_exception("Incomplete tool call");
-                }
-            }
-        } else {
-            throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
-        }
-
-        // Consume any trailing whitespace after this tool call
-        builder.consume_spaces();
-    }
-
-    // Consume any remaining content after all tool calls
-    auto remaining = builder.consume_rest();
-    if (!string_strip(remaining).empty()) {
-        builder.add_content(remaining);
-    }
-}
-
-static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<seed:tool_call>",
-        /* form.tool_start  = */ "<function=",
-        /* form.tool_sep    = */ ">",
-        /* form.key_start   = */ "<parameter=",
-        /* form.key_val_sep = */ ">",
-        /* form.val_end     = */ "</parameter>",
-        /* form.tool_end    = */ "</function>",
-        /* form.scope_end   = */ "</seed:tool_call>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
-}
-
-static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
-
-    // TODO: Tool calling
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) {
-    // 1) <tool_call>{ "name": "...", "arguments": {...} }</tool_call>
-    // 2) <tool_call>{ "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } }</tool_call>
-    static const common_regex tool_call_open(R"(<tool_call[^>]*>)");
-
-    if (!builder.syntax().parse_tool_calls) {
-        LOG_DBG("%s: not parse_tool_calls\n", __func__);
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    LOG_DBG("%s: parse_tool_calls\n", __func__);
-
-    // Find all <tool_call></tool_call> blocks
-    while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) {
-        builder.move_to(first->groups[0].end);
-        builder.consume_spaces();
-
-        builder.try_consume_literal("```json");
-        builder.try_consume_literal("```");
-        builder.consume_spaces();
-
-        // Consume JSON object
-        auto data = builder.consume_json();
-
-        builder.consume_spaces();
-        builder.try_consume_literal("```");
-        builder.consume_spaces();
-
-        if (!builder.try_consume_literal("</tool_call>")) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-        builder.consume_spaces();
-
-        // Extract name and arguments
-        std::string name;
-        std::string id;
-        nlohmann::ordered_json arguments;
-
-        const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool {
-            if (!obj.contains("name") || !obj.contains("arguments")) {
-                return false;
-            }
-            name = obj.at("name").get<std::string>();
-            arguments = obj.at("arguments");
-            if (obj.contains("id") && obj.at("id").is_string()) {
-                id = obj.at("id").get<std::string>();
-            }
-            return true;
-        };
-
-        if (!extract_args(data.json)) {
-            if (data.json.contains("function") && data.json.at("function").is_object()) {
-                auto fn = data.json.at("function");
-                extract_args(fn);
-                if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) {
-                    id = data.json.at("id").get<std::string>();
-                }
-            }
-        }
-
-        // If name is empty, treat the JSON object as content
-        if (name.empty()) {
-            LOG_DBG("%s: tool call missing name, treating as content\n", __func__);
-            builder.add_content(data.json.dump());
-            continue;
-        }
-
-        std::string args_str = arguments.dump();
-        if (!builder.add_tool_call(name, id, args_str)) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-    }
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
-    LOG_DBG("%s: parsing exaone_moe\n", __func__);
-    // EXAONE MoE outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
-    // First try to parse using the standard reasoning parsing method
-    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
-    auto start_pos = builder.pos();
-    auto found_end_think = builder.try_find_literal("</think>");
-    builder.move_to(start_pos);
-
-    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
-        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
-        common_chat_parse_exaone_moe_content(builder);
-    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
-        // If reasoning was parsed successfully, the remaining content is regular content
-        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
-        common_chat_parse_exaone_moe_content(builder);
-    } else {
-        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
-          common_chat_parse_exaone_moe_content(builder);
-          return;
-        }
-        // If no reasoning tags found, check if we should treat everything as reasoning
-        if (builder.syntax().thinking_forced_open) {
-            // If thinking is forced open but no tags found, treat everything as reasoning
-            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
-            builder.add_reasoning_content(builder.consume_rest());
-        } else {
-            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
-            common_chat_parse_exaone_moe_content(builder);
-        }
-    }
-}
-
-static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse(common_chat_msg_parser & builder) {
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
-
-    switch (builder.syntax().format) {
-        case COMMON_CHAT_FORMAT_CONTENT_ONLY:
-            common_chat_parse_content_only(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GENERIC:
-            common_chat_parse_generic(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
-            common_chat_parse_mistral_nemo(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MAGISTRAL:
-            common_chat_parse_magistral(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LLAMA_3_X:
-            common_chat_parse_llama_3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
-            common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
-            break;
-        case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
-            common_chat_parse_deepseek_r1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
-            common_chat_parse_deepseek_v3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
-            common_chat_parse_functionary_v3_2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
-            common_chat_parse_functionary_v3_1_llama_3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_HERMES_2_PRO:
-            common_chat_parse_hermes_2_pro(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
-            common_chat_parse_firefunction_v2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_COMMAND_R7B:
-            common_chat_parse_command_r7b(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GRANITE:
-            common_chat_parse_granite(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GPT_OSS:
-            common_chat_parse_gpt_oss(builder);
-            break;
-        case COMMON_CHAT_FORMAT_SEED_OSS:
-            common_chat_parse_seed_oss(builder);
-            break;
-        case COMMON_CHAT_FORMAT_NEMOTRON_V2:
-            common_chat_parse_nemotron_v2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_APERTUS:
-            common_chat_parse_apertus(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
-            common_chat_parse_lfm2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MINIMAX_M2:
-            common_chat_parse_minimax_m2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GLM_4_5:
-            common_chat_parse_glm_4_5(builder);
-            break;
-        case COMMON_CHAT_FORMAT_KIMI_K2:
-            common_chat_parse_kimi_k2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
-            common_chat_parse_qwen3_coder_xml(builder);
-            break;
-        case COMMON_CHAT_FORMAT_APRIEL_1_5:
-            common_chat_parse_apriel_1_5(builder);
-            break;
-        case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
-            common_chat_parse_xiaomi_mimo(builder);
-            break;
-        case COMMON_CHAT_FORMAT_SOLAR_OPEN:
-            common_chat_parse_solar_open(builder);
-            break;
-        case COMMON_CHAT_FORMAT_EXAONE_MOE:
-            common_chat_parse_exaone_moe(builder);
-            break;
-        default:
-            throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
-    }
-    builder.finish();
-}
-
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
-    if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
-        syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
-        syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
-        return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
-    }
-    common_chat_msg_parser builder(input, is_partial, syntax);
-    try {
-        common_chat_parse(builder);
-    } catch (const common_chat_msg_partial_exception & ex) {
-        LOG_DBG("Partial parse: %s\n", ex.what());
-        if (!is_partial) {
-            builder.clear_tools();
-            builder.move_to(0);
-            common_chat_parse_content_only(builder);
-        }
-    }
-    auto msg = builder.result();
-    if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
-    }
-    return msg;
-}
-
-common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
-    if (parser.empty()) {
-        throw std::runtime_error("Failed to parse due to missing parser definition.");
-    }
-
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
-
-    common_peg_parse_context ctx(input, is_partial);
-    auto result = parser.parse(ctx);
-    if (result.fail()) {
-        throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
-    }
-
-    common_chat_msg msg;
-    msg.role = "assistant";
-
-    if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
-        auto mapper = common_chat_peg_native_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
-        auto mapper = common_chat_peg_constructed_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    } else {
-        // Generic mapper
-        auto mapper = common_chat_peg_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    }
-    if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
-    }
-    return msg;
-}
diff --git a/common/chat-parser.h b/common/chat-parser.h
deleted file mode 100644
index 3ed9c30a2b..0000000000
--- a/common/chat-parser.h
+++ /dev/null
@@ -1,133 +0,0 @@
-#pragma once
-
-#include "chat.h"
-#include "chat-parser-xml-toolcall.h"
-#include "json-partial.h"
-#include "regex-partial.h"
-
-#include <nlohmann/json_fwd.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-class common_chat_msg_partial_exception : public std::runtime_error {
-  public:
-    common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-class common_chat_msg_parser {
-    std::string input_;
-    bool is_partial_;
-    common_chat_parser_params syntax_; // TODO: rename to params
-    std::string healing_marker_;
-
-    size_t pos_ = 0;
-    common_chat_msg result_;
-
-  public:
-    common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
-    const std::string & input() const { return input_; }
-    size_t pos() const { return pos_; }
-    const std::string & healing_marker() const { return healing_marker_; }
-    const bool & is_partial() const { return is_partial_; }
-    const common_chat_msg & result() const { return result_; }
-    const common_chat_parser_params & syntax() const { return syntax_; }
-
-    void move_to(size_t pos) {
-        if (pos > input_.size()) {
-            throw std::runtime_error("Invalid position!");
-        }
-        pos_ = pos;
-    }
-    void move_back(size_t n) {
-        if (pos_ < n) {
-            throw std::runtime_error("Can't move back that far!");
-        }
-        pos_ -= n;
-    }
-
-    // Get the substring of the input at the given range
-    std::string str(const common_string_range & rng) const;
-
-    // Appends to the result.content field
-    void add_content(const std::string & content);
-
-    // Appends to the result.reasoning_content field
-    void add_reasoning_content(const std::string & reasoning_content);
-
-    // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
-    bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
-
-    // Adds a tool call using the "name", "id" and "arguments" fields of the json object
-    bool add_tool_call(const nlohmann::ordered_json & tool_call);
-
-    // Adds an array of tool calls using their "name", "id" and "arguments" fields.
-    bool add_tool_calls(const nlohmann::ordered_json & arr);
-
-    // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
-    bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
-
-    void finish();
-
-    bool consume_spaces();
-
-    void consume_literal(const std::string & literal);
-
-    bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
-
-    std::string consume_rest();
-
-    struct find_regex_result {
-        std::string prelude;
-        std::vector<common_string_range> groups;
-    };
-
-    std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
-
-    bool try_consume_literal(const std::string & literal);
-
-    std::optional<find_regex_result> try_find_literal(const std::string & literal);
-
-    find_regex_result consume_regex(const common_regex & regex);
-
-    std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
-
-    std::optional<common_json> try_consume_json();
-    common_json consume_json();
-
-    struct consume_json_result {
-        nlohmann::ordered_json value;
-        bool is_partial;
-    };
-
-    /*
-        Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
-
-        By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
-        e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
-
-        But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
-        - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
-        - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
-    */
-    consume_json_result consume_json_with_dumped_args(
-        const std::vector<std::vector<std::string>> & args_paths = {},
-        const std::vector<std::vector<std::string>> & content_paths = {}
-    );
-    std::optional<consume_json_result> try_consume_json_with_dumped_args(
-        const std::vector<std::vector<std::string>> & args_paths = {},
-        const std::vector<std::vector<std::string>> & content_paths = {}
-    );
-
-    /**
-     * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
-     * form.scope_start, form.tool_sep and form.scope_end can be empty.
-     */
-    bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
-
-    // Parse content uses reasoning and XML-Style tool call
-    void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
-
-    void clear_tools();
-};
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
index 1bcba9cd86..ba49ecf29b 100644
--- a/common/chat-peg-parser.cpp
+++ b/common/chat-peg-parser.cpp
@@ -1,13 +1,16 @@
 #include "chat-peg-parser.h"
 
+#include "chat-auto-parser.h"
+#include "ggml.h"
+
 #include <nlohmann/json.hpp>
 
-using json = nlohmann::json;
+using json = nlohmann::ordered_json;
 
 static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
     int count = 0;
     while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
-        if (max != -1 && count <= max) {
+        if (max != -1 && count >= max) {
             break;
         }
         sv.remove_suffix(1);
@@ -16,109 +19,966 @@ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
     return sv;
 }
 
+static std::string_view trim_leading_space(std::string_view sv, int max = -1) {
+    int count = 0;
+    while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front()))) {
+        if (max != -1 && count >= max) {
+            break;
+        }
+        sv.remove_prefix(1);
+        count++;
+    }
+    return sv;
+}
+
+static std::string_view trim(std::string_view sv) {
+    return trim_trailing_space(trim_leading_space(sv, 1));
+}
+
+// Convert Python-style single-quoted strings to JSON double-quoted strings
+// Only converts outer string delimiters, properly handling escape sequences:
+// - {'key': 'value'} -> {"key": "value"}
+// - {'code': 'print(\'hello\')'} -> {"code": "print('hello')"}
+// - {'msg': 'He said "hi"'} -> {"msg": "He said \"hi\""}
+static std::string normalize_quotes_to_json(const std::string & input) {
+    std::string result;
+    result.reserve(input.size() + 16);  // May need extra space for escaping
+
+    bool in_single_quoted = false;
+    bool in_double_quoted = false;
+
+    for (size_t i = 0; i < input.size(); ++i) {
+        char c = input[i];
+
+        // Handle escape sequences
+        if (c == '\\' && i + 1 < input.size()) {
+            char next = input[i + 1];
+
+            if (in_single_quoted) {
+                // Inside a single-quoted string being converted to double quotes
+                if (next == '\'') {
+                    // \' -> ' (escaped single quote becomes unescaped in double-quoted string)
+                    result += '\'';
+                    ++i;
+                    continue;
+                }
+                if (next == '"') {
+                    // \" stays as \" (already escaped, works in double-quoted string)
+                    result += "\\\"";
+                    ++i;
+                    continue;
+                }
+                // Other escapes (\n, \\, etc.): pass through both characters
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            if (in_double_quoted) {
+                // Inside a double-quoted string - pass through escape sequences as-is
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            // Outside any string - just pass through the backslash
+            result += c;
+            continue;
+        }
+
+        // Handle quote characters
+        if (c == '"') {
+            if (in_single_quoted) {
+                // Unescaped double quote inside single-quoted string -> must escape for JSON
+                result += "\\\"";
+            } else {
+                // Double quote as string delimiter or outside strings
+                in_double_quoted = !in_double_quoted;
+                result += c;
+            }
+        } else if (c == '\'') {
+            if (in_double_quoted) {
+                // Single quote inside double-quoted string -> pass through
+                result += c;
+            } else if (in_single_quoted) {
+                // Closing single quote -> convert to double quote
+                in_single_quoted = false;
+                result += '"';
+            } else {
+                // Opening single quote -> convert to double quote
+                in_single_quoted = true;
+                result += '"';
+            }
+        } else {
+            result += c;
+        }
+    }
+
+    return result;
+}
+
 void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
-    arena.visit(result, [this](const common_peg_ast_node & node) {
-        map(node);
-    });
+    arena.visit(result, [this](const common_peg_ast_node & node) { map(node); });
 }
 
 void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
     bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
-    bool is_content = node.tag == common_chat_peg_builder::CONTENT;
+    bool is_content   = node.tag == common_chat_peg_builder::CONTENT;
 
-    if (is_reasoning) {
-        result.reasoning_content = std::string(trim_trailing_space(node.text));
+    if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here
+        result.reasoning_content += std::string(trim_trailing_space(node.text));
     }
 
     if (is_content) {
-        result.content = std::string(trim_trailing_space(node.text));
+        // Concatenate content from multiple content nodes (e.g., when reasoning markers
+        // are preserved before content markers in reasoning_format=NONE mode)
+        result.content += std::string(trim_trailing_space(node.text));
     }
 }
 
-void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
+common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string &       tag_name,
+                                                                 const std::string &       marker,
+                                                                 const common_peg_parser & p) {
+    if (marker.empty()) {
+        return zero_or_more(choice({ p, rule(tag_name, content(any())) }));
+    }
+    auto content_chunk = rule(tag_name, content(negate(literal(marker)) + any() + until(marker)));
+    return zero_or_more(choice({ p, content_chunk }));
+}
+
+common_peg_parser common_chat_peg_unified_builder::build_reasoning_block(const content_structure & cs,
+                                                                         common_reasoning_format   reasoning_format,
+                                                                         bool thinking_forced_open) {
+    // If reasoning is explicitly disabled, return empty
+    if (reasoning_format == COMMON_REASONING_FORMAT_NONE) {
+        return eps();
+    }
+
+    // Get reasoning markers - use from content_structure or fallback for DEEPSEEK format
+    std::string reason_start = cs.reasoning_start;
+    std::string reason_end   = cs.reasoning_end;
+
+    // If DEEPSEEK format is specified but markers weren't detected, use fallback markers
+    if ((reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ||
+         reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY) &&
+        (reason_start.empty() || reason_end.empty())) {
+        // Try standard DeepSeek markers
+        if (reason_start.empty()) {
+            reason_start = "<think>";
+        }
+        if (reason_end.empty()) {
+            reason_end = "</think>";
+        }
+    }
+
+    // If still no markers, return empty
+    // But allow empty start marker if thinking is forced open (implicit start)
+    if ((reason_start.empty() && !thinking_forced_open) || reason_end.empty()) {
+        return eps();
+    }
+
+    if (thinking_forced_open) {
+        // Mandatory reasoning: parse from current position to end marker
+        auto parser = reasoning(until(reason_end)) + literal(reason_end);
+        return rule("reasoning", reasoning_block(parser));
+    }
+    // Optional reasoning: may or may not appear
+    // Also try <|START_THINKING|> style markers if standard markers don't match
+    auto standard_reasoning =
+        reasoning_block(literal(reason_start) + reasoning(until(reason_end)) + literal(reason_end));
+
+    // For templates that use <|START_THINKING|> style markers
+    if (reason_start == "<think>" && reason_end == "</think>") {
+        auto alt_reasoning = reasoning_block(literal("<|START_THINKING|>") + reasoning(until("<|END_THINKING|>")) +
+                                             literal("<|END_THINKING|>"));
+        return optional(rule("reasoning", choice({ standard_reasoning, alt_reasoning })));
+    }
+
+    return optional(rule("reasoning", standard_reasoning));
+}
+
+common_peg_parser common_chat_peg_unified_builder::build_content_block(const content_structure & cs,
+                                                                       common_reasoning_format   reasoning_format,
+                                                                       const std::string &       tool_section_start) {
+    GGML_UNUSED(tool_section_start);  // leaving for now just in case
+    std::string content_start = cs.content_start;
+    std::string content_end   = cs.content_end;
+
+    // Add fallback content markers for DEEPSEEK format if not detected
+    // Some templates use <response> tags for content when reasoning is enabled
+    if ((reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ||
+         reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY) &&
+        (content_start.empty() || content_end.empty())) {
+        content_start = "<response>";
+        content_end   = "</response>";
+    }
+
+    // Handle content markers with both start and end
+    if (cs.content_mode != content_structure::CONTENT_PLAIN && !cs.content_start.empty() && !cs.content_end.empty()) {
+        // Content is wrapped in markers
+        if (reasoning_format == COMMON_REASONING_FORMAT_NONE) {
+            // When reasoning_format=NONE, preserve any content before the content start marker
+            // (this may include reasoning/thinking markers that the model generates).
+            // This applies even if reasoning markers weren't detected by the analyzer.
+            auto with_markers = content(until(cs.content_start)) + literal(cs.content_start) +
+                                content(until(cs.content_end)) + literal(cs.content_end);
+            // Fallback: content wrapped in end marker only (start marker might be in prompt)
+            auto implicit_markers = content(until(cs.content_end)) + literal(cs.content_end);
+            auto without_markers  = content(rest());
+            return choice({ with_markers, implicit_markers, without_markers });
+        }  // When reasoning is parsed separately, content starts directly after reasoning block
+        auto with_markers     = literal(cs.content_start) + content(until(cs.content_end)) + literal(cs.content_end);
+        auto implicit_markers = content(until(cs.content_end)) + literal(cs.content_end);
+        auto without_markers  = content(rest());
+        return choice({ with_markers, implicit_markers, without_markers });
+    }
+
+    // Handle content with only start marker (no end marker)
+    // This is for formats like recipient-based (Functionary v3.2) where content is prefixed with
+    // a marker but has no explicit closing marker - content ends at end of message or before tool calls
+    if (cs.content_mode != content_structure::CONTENT_PLAIN && !cs.content_start.empty() && cs.content_end.empty()) {
+        if (reasoning_format == COMMON_REASONING_FORMAT_NONE) {
+            // Preserve any content before the start marker, then consume the marker and capture rest
+            auto with_start_marker = content(until(cs.content_start)) + literal(cs.content_start) + content(rest());
+            auto without_markers   = content(rest());
+            return choice({ with_start_marker, without_markers });
+        }  // Content starts directly after reasoning block
+        auto with_start_marker = literal(cs.content_start) + content(rest());
+        auto without_markers   = content(rest());
+        return choice({ with_start_marker, without_markers });
+    }
+
+    // For DEEPSEEK format, try fallback content markers even if not detected
+    if (!content_start.empty() && !content_end.empty()) {
+        auto with_markers    = literal(content_start) + content(until(content_end)) + literal(content_end);
+        auto without_markers = content(rest());
+        return choice({ with_markers, without_markers });
+    }
+
+    // Plain content - capture rest
+    return content(rest());
+}
+
+common_peg_parser common_chat_peg_unified_builder::build_tool_section(const tool_call_structure & ts,
+                                                                      const nlohmann::json &      tools,
+                                                                      bool                        parallel_tool_calls,
+                                                                      bool                        force_tool_calls) {
+    if (!ts.supports_tools || !tools.is_array() || tools.empty()) {
+        return eps();
+    }
+
+    // Build tool choices based on function format
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        tool_choices |= rule("tool-" + name, build_function(ts, name, params));
+    }
+
+    // Build the section with or without markers
+    auto build_section = [&]() -> common_peg_parser {
+        // Markdown code block format (Cohere Command-R Plus):
+        // Action:\n```json\n[{...}]\n```
+        if (ts.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK) {
+            // Build the opening: "Action:\n```json"
+            std::string code_fence_open = "```";
+            if (!ts.code_block_language.empty()) {
+                code_fence_open += ts.code_block_language;
+            }
+
+            auto opening = literal(ts.code_block_marker) + literal("\n") + literal(code_fence_open) + literal("\n");
+            auto closing = literal("\n") + literal(ts.tool_section_end);  // "\n```"
+
+            // Build the JSON array of tool calls
+            // Don't use trigger_rule here since we're nested inside a sequence
+            auto tools_array = literal("[") + space();
+            if (parallel_tool_calls) {
+                tools_array = tools_array + tool_choices;
+                tools_array = tools_array + zero_or_more(space() + literal(",") + space() + tool_choices);
+            } else {
+                tools_array = tools_array + optional(tool_choices);
+            }
+            tools_array = tools_array + space() + literal("]");
+
+            // Full section: Action:\n```json\n[{...}]\n```
+            return trigger_rule("tool-call", opening + tools_array + closing);
+        }
+
+        // Recipient-based format (Functionary v3.2): >>>function_name\n{arguments}
+        // Uses tool_section_start as delimiter, but no array wrapper or section markers
+        if (ts.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
+            auto tool_call = trigger_rule("tool-call", tool_choices);
+            if (parallel_tool_calls) {
+                // Multiple tool calls: each starts with >>>
+                return one_or_more(tool_call + space());
+            }
+            return tool_call;
+        }
+
+        if (!ts.tool_section_start.empty() && !ts.tool_section_end.empty()) {
+            // Check if this format has SEPARATE section markers and per-call markers.
+            // This happens when:
+            // - Section markers wrap the ENTIRE section (e.g., <tool_calls_begin>...<tool_calls_end>)
+            // - Function prefix contains its own per-call marker (e.g., <tool_call_begin>...)
+            // Example: DeepSeek R1 with section and call markers, Kimi-K2 with prefixed-indexed format
+            // We detect this by checking if function_prefix contains a per-call START marker
+            // (indicated by words like "call_begin", "call_start", or similar patterns)
+            bool has_separate_section_and_call_markers = false;
+
+            // FUNC_PREFIXED_INDEXED and FUNC_BRACKET_TAG always have separate section and per-call markers
+            if (ts.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED ||
+                ts.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
+                has_separate_section_and_call_markers = true;
+            } else if (ts.function_format == tool_call_structure::FUNC_NAME_AS_KEY) {
+                // FUNC_NAME_AS_KEY uses comma-separated JSON objects in an array
+                // Format: [{"func1": args}, {"func2": args}]
+                // The brackets are included in section markers
+                auto tool_call  = trigger_rule("tool-call", tool_choices);
+                auto tool_calls = tool_call;
+                if (parallel_tool_calls) {
+                    tool_calls = tool_call + zero_or_more(space() + literal(",") + space() + tool_call);
+                }
+                return literal(ts.tool_section_start) + space() + tool_calls + space() + literal(ts.tool_section_end);
+            } else if (ts.function_format == tool_call_structure::FUNC_TAG_WITH_NAME && !ts.function_prefix.empty()) {
+                // Check if function_prefix contains a per-call marker like "<tool_call_begin>"
+                // This differentiates DeepSeek R1 (where function_prefix has its own call marker)
+                // from Nemotron (where function_prefix is just "<function=")
+                // DeepSeek pattern: function_prefix = "<｜tool▁call▁begin｜>function<｜tool▁sep｜>"
+                // Nemotron pattern: function_prefix = "<function="
+                bool prefix_has_call_marker = ts.function_prefix.find("call") != std::string::npos &&
+                                              (ts.function_prefix.find("begin") != std::string::npos ||
+                                               ts.function_prefix.find("start") != std::string::npos);
+                if (prefix_has_call_marker) {
+                    has_separate_section_and_call_markers = true;
+                }
+            }
+            if (has_separate_section_and_call_markers) {
+                // Section markers wrap all calls, per-call markers are in function_prefix/close
+                // Format: <section_start> <call1> <call2> ... <section_end>
+                auto tool_call  = trigger_rule("tool-call", tool_choices);
+                auto tool_calls = parallel_tool_calls ? one_or_more(tool_call + space()) : tool_call;
+                return literal(ts.tool_section_start) + space() + tool_calls + space() + literal(ts.tool_section_end);
+            }  // Each tool call has its own wrapper: <tool_call>tool</tool_call>
+            auto single_tool_section =
+                trigger_rule("tool-call", literal(ts.tool_section_start) + space() + tool_choices + space() +
+                                              literal(ts.tool_section_end));
+            if (parallel_tool_calls) {
+                // Multiple wrapped tool calls
+                return one_or_more(single_tool_section + space());
+            }
+            return single_tool_section;
+        }
+        if (!ts.tool_section_start.empty()) {
+            // Start marker only (no end marker) - e.g., <|tool_call|>[...]
+            // Wrap all tool calls in an array after the start marker
+            auto tools_array = literal("[") + space();
+            if (parallel_tool_calls) {
+                tools_array = tools_array + tool_choices;
+                tools_array = tools_array + zero_or_more(space() + literal(",") + space() + tool_choices);
+            } else {
+                tools_array = tools_array + optional(tool_choices);
+            }
+            tools_array = tools_array + space() + literal("]");
+
+            return trigger_rule("tool-call", literal(ts.tool_section_start) + tools_array);
+        }  // No section markers (raw JSON format, e.g., Llama 3.1)
+        // Use trigger rule since tool calls are identified by regex trigger on the grammar
+        if (parallel_tool_calls) {
+            return trigger_rule("tool-call", one_or_more(tool_choices + space()));
+        }
+        return trigger_rule("tool-call", tool_choices);
+    };
+
+    auto section = build_section();
+    if (!force_tool_calls) {
+        section = optional(section);
+    }
+
+    return section;
+}
+
+common_peg_parser common_chat_peg_unified_builder::build_function(const tool_call_structure & ts,
+                                                                  const std::string &         name,
+                                                                  const nlohmann::json &      schema) {
+    auto args = build_arguments(ts, schema);
+
+    switch (ts.function_format) {
+        case tool_call_structure::FUNC_JSON_OBJECT:
+            {
+                // Build JSON object parser that accepts id field in either position:
+                // - Before name: {"id": "...", "name": "X", "arguments": {...}} (R7B style)
+                // - After args:  {"name": "X", "arguments": {...}, "id": "..."} (Mistral style)
+                auto tool_name_ = json_member(ts.name_field, "\"" + tool_name(literal(name)) + "\"");
+                auto tool_args_ = json_member(ts.args_field, tool_args(args));
+
+                // id can appear before name or after args
+                auto id_member = json_member(ts.id_field, tool_id(json_string()));
+                auto id_before = ts.id_field.empty() ? eps() : optional(id_member << space() << "," << space());
+                auto id_after  = ts.id_field.empty() ? eps() : optional(space() << "," << space() << id_member);
+
+                return tool(tool_open(literal("{")) << space() << id_before  // optional id before name (R7B style)
+                                                    << tool_name_ << space() << "," << space() << tool_args_
+                                                    << id_after              // optional id after args (Mistral style)
+                                                    << zero_or_more(space() << "," << space() << json_string()
+                                                                            << space() << ":" << space() << json())
+                                                    << space() << "}");
+            }
+
+        case tool_call_structure::FUNC_TAG_WITH_NAME:
+            {
+                // Build tag parser: <function=X>{...}</function>
+                // Combine prefix + name + suffix into tool_open to ensure the tool is only created
+                // when the FULL opening tag is confirmed. This prevents partial name matches during
+                // incremental parsing (e.g., matching "special_function" when input is "special_function_")
+                auto opening = literal(ts.function_prefix) + tool_name(literal(name)) + literal(ts.function_suffix);
+                // Note: No space() before tool_close because function_close may start with newline
+                // (e.g., "\n```<close_tag>") and space() would consume it, preventing the literal match
+                return tool(tool_open(opening) + space() + tool_args(args) + tool_close(literal(ts.function_close)));
+            }
+
+        case tool_call_structure::FUNC_TAG_NAME_ONLY:
+            {
+                // Build tag parser: <X>...</X>
+                // Combine < + name + > into tool_open to prevent partial matches
+                auto opening = literal("<") + tool_name(literal(name)) + literal(">");
+                return tool(tool_open(opening) + space() + tool_args(args) + space() +
+                            tool_close(literal("</" + name + ">")));
+            }
+
+        case tool_call_structure::FUNC_PREFIXED_INDEXED:
+            {
+                // Build prefixed-indexed parser (e.g., Kimi-K2):
+                // <|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{...}<|tool_call_end|>
+                // The index number after : is ignored (we use zero_or_more(digit) to skip it)
+                auto opening = literal(ts.per_call_start) + literal(ts.function_namespace) + tool_name(literal(name)) +
+                               literal(":") + zero_or_more(chars("0-9", 1, 1)) +  // Skip the index
+                               literal(ts.args_marker);
+                return tool(tool_open(opening) + space() + tool_args(args) + space() +
+                            tool_close(literal(ts.per_call_end)));
+            }
+
+        case tool_call_structure::FUNC_NAME_AS_KEY:
+            {
+                // Build name-as-key parser (e.g., Apertus):
+                // {"function_name": {...arguments...}}
+                // The function name IS the JSON key, and arguments are the value directly
+                auto opening = literal("{\"") + tool_name(literal(name)) + literal("\":");
+                return tool(tool_open(opening) + space() + tool_args(args) + space() + literal("}"));
+            }
+
+        case tool_call_structure::FUNC_BRACKET_TAG:
+            {
+                // Build bracket-tag parser (e.g., Mistral Small 3.2):
+                // [TOOL_CALLS]function_name[CALL_ID]call_id[ARGS]{...}
+                // per_call_start = "[TOOL_CALLS]"
+                // id_marker = "[CALL_ID]"
+                // args_marker = "[ARGS]"
+                auto opening = literal(ts.per_call_start) + tool_name(literal(name));
+                if (!ts.id_marker.empty()) {
+                    // Add id_marker + id value (captured as tool_id)
+                    opening = opening + literal(ts.id_marker) + tool_id(until(ts.args_marker));
+                }
+                if (!ts.args_marker.empty()) {
+                    opening = opening + literal(ts.args_marker);
+                }
+                // No explicit closer for this format (EOS terminates)
+                return tool(tool_open(opening) + space() + tool_args(args));
+            }
+
+        case tool_call_structure::FUNC_RECIPIENT_BASED:
+            {
+                // Build recipient-based parser (e.g., Functionary v3.2):
+                // >>>function_name
+                // {'param1': 'value1', 'param2': 'value2'}
+                // tool_section_start = ">>>"
+                // Function name directly follows ">>>" with newline, arguments are Python dict (parse as JSON)
+                auto opening = literal(ts.tool_section_start) + tool_name(literal(name));
+                // No explicit closer (newline + arguments, then EOS or next >>>)
+                return tool(tool_open(opening) + space() + tool_args(args));
+            }
+
+        case tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK:
+            {
+                // Build markdown code block parser (e.g., Cohere Command-R Plus):
+                // Action:
+                // ```json
+                // [
+                //     {
+                //         "tool_name": "function_name",
+                //         "parameters": {...}
+                //     }
+                // ]
+                // ```
+                // The individual function is a JSON object within the array
+                auto tool_name_ = json_member(ts.name_field, "\"" + tool_name(literal(name)) + "\"");
+                auto tool_args_ = json_member(ts.args_field, tool_args(args));
+
+                // Build the JSON object: {"tool_name": "...", "parameters": {...}}
+                // Use same pattern as FUNC_JSON_OBJECT: tool_open with atomic wrapper
+                return tool(tool_open(literal("{")) << space() << tool_name_ << space() << "," << space() << tool_args_
+                                                    << zero_or_more(space() << "," << space() << json_string()
+                                                                            << space() << ":" << space() << json())
+                                                    << space() << "}");
+            }
+    }
+
+    return eps();
+}
+
+common_peg_parser common_chat_peg_unified_builder::build_arguments(const tool_call_structure & ts,
+                                                                   const nlohmann::json &      params) {
+    switch (ts.argument_format) {
+        case tool_call_structure::ARGS_JSON:
+            {
+                // Standard JSON object arguments
+                if (params.is_object()) {
+                    return schema(json(), "args", params);
+                }
+                return json();
+            }
+
+        case tool_call_structure::ARGS_TAGGED:
+            {
+                // Tagged arguments: <param=key>value</param>
+                if (!params.contains("properties") || params.at("properties").empty()) {
+                    return eps();
+                }
+
+                auto arg_choice = choice();
+                for (const auto & el : params.at("properties").items()) {
+                    const std::string & prop_name   = el.key();
+                    const auto &        prop_schema = el.value();
+
+                    // Check if the schema declares this as a string type
+                    bool is_string_type = prop_schema.contains("type") && prop_schema.at("type") == "string";
+
+                    auto arg_name_parser = choice(
+                        { literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
+
+                    // Use tool_arg_string_value for string types to prevent treating "[..." as JSON array
+                    auto value_parser = is_string_type ? tool_arg_string_value(until(ts.arg_close))
+                                                       : tool_arg_value(until(ts.arg_close));
+
+                    auto arg_rule = tool_arg(tool_arg_open(literal(ts.arg_prefix)) + tool_arg_name(arg_name_parser) +
+                                             literal(ts.arg_suffix) + value_parser +
+                                             tool_arg_close(literal(ts.arg_close)) +
+                                             (ts.arg_separator.empty() ? eps() : optional(literal(ts.arg_separator))));
+                    arg_choice |= arg_rule;
+                }
+                return zero_or_more(arg_choice + space());
+            }
+
+        case tool_call_structure::ARGS_KEY_VALUE_TAGS:
+            {
+                // Key-value tag arguments (GLM-4.6 style):
+                // <arg_key>key</arg_key>
+                // <arg_value>value</arg_value>
+                if (!params.contains("properties") || params.at("properties").empty()) {
+                    return eps();
+                }
+
+                auto arg_choice = choice();
+                for (const auto & el : params.at("properties").items()) {
+                    const std::string & prop_name   = el.key();
+                    const auto &        prop_schema = el.value();
+
+                    // Check if the schema declares this as a string type
+                    bool is_string_type = prop_schema.contains("type") && prop_schema.at("type") == "string";
+
+                    // Parse: <arg_key>key</arg_key>\n<arg_value>value</arg_value>
+                    // ts.arg_prefix = "<arg_key>", ts.arg_suffix = "</arg_key>", ts.arg_close = "</arg_value>"
+                    // Use tool_arg_string_value for string types to prevent treating "[..." as JSON array
+                    auto value_parser = is_string_type ? tool_arg_string_value(until(ts.arg_close))
+                                                       : tool_arg_value(until(ts.arg_close));
+
+                    auto arg_rule = tool_arg(tool_arg_open(literal(ts.arg_prefix)) + tool_arg_name(literal(prop_name)) +
+                                             literal(ts.arg_suffix) +  // </arg_key>
+                                             space() + literal("<arg_value>") + value_parser +
+                                             tool_arg_close(literal(ts.arg_close)));
+                    arg_choice |= arg_rule;
+                }
+                return zero_or_more(arg_choice + space());
+            }
+    }
+
+    return eps();
+}
+
+common_peg_parser common_chat_peg_unified_builder::standard_json_tools(const std::string &    section_start,
+                                                                       const std::string &    section_end,
+                                                                       const nlohmann::json & tools,
+                                                                       bool                   parallel_tool_calls,
+                                                                       bool                   force_tool_calls) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
+    }
+
+    // Build tool choices for JSON format
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        // Build JSON object parser: {"name": "X", "arguments": {...}}
+        auto tool_name_ = json_member("name", "\"" + tool_name(literal(name)) + "\"");
+        auto tool_args_ = json_member("arguments", tool_args(schema(json(), "tool-" + name + "-schema", params)));
+
+        auto tool_parser =
+            tool(tool_open(literal("{")) << space() << tool_name_ << space() << "," << space() << tool_args_
+                                         << zero_or_more(space() << "," << space() << json_string() << space() << ":"
+                                                                 << space() << json())
+                                         << space() << "}");
+
+        tool_choices |= rule("tool-" + name, tool_parser);
+    }
+
+    // Build the section with markers
+    auto tool_calls = tool_choices;
+    if (parallel_tool_calls) {
+        tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
+    }
+
+    auto section =
+        trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end));
+
+    return force_tool_calls ? section : optional(section);
+}
+
+common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools(
+    const std::map<std::string, std::string> & markers,
+    const nlohmann::json &                     tools,
+    bool                                       parallel_tool_calls,
+    bool                                       force_tool_calls) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
+    }
+
+    // Extract markers with defaults
+    auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string {
+        auto it = markers.find(key);
+        return it != markers.end() ? it->second : default_val;
+    };
+
+    std::string section_start    = get_marker("tool_call_start_marker", "<tool_call>");
+    std::string section_end      = get_marker("tool_call_end_marker", "</tool_call>");
+    std::string func_opener      = get_marker("function_opener", "<function=");
+    std::string func_name_suffix = get_marker("function_name_suffix", ">");
+    std::string func_closer      = get_marker("function_closer", "</function>");
+    std::string param_key_prefix = get_marker("parameter_key_prefix", "<param=");
+    std::string param_key_suffix = get_marker("parameter_key_suffix", ">");
+    std::string param_closer     = get_marker("parameter_closer", "</param>");
+
+    // Build tool choices for tagged format
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        // Build argument parsers
+        auto args = eps();
+        if (params.contains("properties") && !params["properties"].empty()) {
+            auto arg_choice = choice();
+            for (const auto & el : params["properties"].items()) {
+                const std::string & prop_name = el.key();
+
+                auto arg_name_parser =
+                    choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
+
+                auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) +
+                                         literal(param_key_suffix) + tool_arg_value(until(param_closer)) +
+                                         tool_arg_close(literal(param_closer)));
+                arg_choice |= arg_rule;
+            }
+            args = zero_or_more(arg_choice + space());
+        }
+
+        // Build function parser: <function=name>args</function>
+        auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) +
+                                space() + tool_args(args) + space() + tool_close(literal(func_closer)));
+
+        tool_choices |= rule("tool-" + name, tool_parser);
+    }
+
+    // Build the section with markers
+    auto section =
+        parallel_tool_calls ?
+            trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) +
+                                          literal(section_end)) :
+            trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end));
+
+    return force_tool_calls ? section : optional(section);
+}
+
+void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena &    arena,
+                                              const common_peg_parse_result & parse_result_arg) {
+    // Call base class to visit all nodes
+    common_chat_peg_mapper::from_ast(arena, parse_result_arg);
+
+    // Flush any pending tool call that was started but never got a name
+    // This happens during partial parsing when the tool call is incomplete
+    if (pending_tool_call.has_value()) {
+        // Transfer any buffered arguments
+        if (!args_buffer.empty()) {
+            pending_tool_call->arguments = args_buffer;
+        }
+        // Close any open quotes in buffered args
+        if (buffer_needs_closing_quote && !pending_tool_call->arguments.empty()) {
+            pending_tool_call->arguments += "\"";
+        }
+        // Add the incomplete tool call to results
+        result.tool_calls.push_back(pending_tool_call.value());
+        pending_tool_call.reset();
+    }
+}
+
+void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
+    // First call base class for reasoning/content handling
     common_chat_peg_mapper::map(node);
 
-    bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
-    bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
-    bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
-    bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
+    // Handle tool-related tags (unified version supporting both JSON and tagged formats)
+    bool is_tool_open  = node.tag == common_chat_peg_unified_builder::TOOL_OPEN;
+    bool is_tool_close = node.tag == common_chat_peg_unified_builder::TOOL_CLOSE;
+    bool is_tool_name  = node.tag == common_chat_peg_unified_builder::TOOL_NAME;
+    bool is_tool_id    = node.tag == common_chat_peg_unified_builder::TOOL_ID;
+    bool is_tool_args  = node.tag == common_chat_peg_unified_builder::TOOL_ARGS;
+    bool is_arg_open   = node.tag == common_chat_peg_unified_builder::TOOL_ARG_OPEN;
+    bool is_arg_close  = node.tag == common_chat_peg_unified_builder::TOOL_ARG_CLOSE;
+    bool is_arg_name         = node.tag == common_chat_peg_unified_builder::TOOL_ARG_NAME;
+    bool is_arg_value        = node.tag == common_chat_peg_unified_builder::TOOL_ARG_VALUE;
+    bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE;
 
     if (is_tool_open) {
-        result.tool_calls.emplace_back();
-        current_tool = &result.tool_calls.back();
+        // Don't create tool call yet - wait for name to be known
+        // This prevents sending incomplete tool calls in streaming mode
+        pending_tool_call = common_chat_tool_call();
+        current_tool      = &pending_tool_call.value();
+        arg_count         = 0;
+        // Clear the arguments buffer for the new tool
+        args_buffer.clear();
+        needs_closing_quote        = false;
+        buffer_needs_closing_quote = false;
     }
 
     if (is_tool_id && current_tool) {
-        current_tool->id = std::string(trim_trailing_space(node.text));
+        auto text = trim_trailing_space(node.text);
+        if (text.size() >= 2 && text.front() == '"' && text.back() == '"') {
+            text = text.substr(1, text.size() - 2);
+        }
+        current_tool->id = std::string(text);
     }
 
     if (is_tool_name && current_tool) {
         current_tool->name = std::string(trim_trailing_space(node.text));
+        // Now that we have the name, we can populate the arguments from the buffer
+        if (!args_buffer.empty()) {
+            current_tool->arguments = args_buffer;
+            args_buffer.clear();
+        } else if (current_tool->arguments.empty()) {
+            // Initialize arguments if we're using tagged format and no buffered args
+            current_tool->arguments = "{";
+        }
+        // Now that we have the name, add the tool call to the result
+        if (pending_tool_call.has_value()) {
+            result.tool_calls.push_back(pending_tool_call.value());
+            pending_tool_call.reset();
+            current_tool = &result.tool_calls.back();
+        }
     }
 
     if (is_tool_args && current_tool) {
-        current_tool->arguments = std::string(trim_trailing_space(node.text));
-    }
-}
-
-void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
-    common_chat_peg_mapper::map(node);
-
-    bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
-    bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
-    bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
-    bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
-    bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
-    bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
-    bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
-    bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
-
-    if (is_tool_open) {
-        result.tool_calls.emplace_back();
-        current_tool = &result.tool_calls.back();
-        arg_count = 0;
-    }
-
-    if (is_tool_name) {
-        current_tool->name = std::string(node.text);
-        current_tool->arguments = "{";
+        // For JSON format, the arguments come as a complete JSON object
+        // For tagged format, we build up arguments from individual arg_name/arg_value nodes
+        // Check if this looks like JSON (starts with {) vs tagged format (starts with <)
+        auto text = trim_trailing_space(node.text);
+        if (!text.empty() && text.front() == '{') {
+            // If we have the tool name, populate directly; otherwise buffer
+            if (!current_tool->name.empty()) {
+                current_tool->arguments = std::string(text);
+            } else {
+                args_buffer = std::string(text);
+            }
+        }
+        // If it's tagged format, we ignore this and let arg_name/arg_value build up the JSON
     }
 
     if (is_arg_open) {
-        needs_closing_quote = false;
+        // Reset for new argument
+        if (!current_tool->name.empty()) {
+            needs_closing_quote = false;
+        } else {
+            buffer_needs_closing_quote = false;
+        }
     }
 
     if (is_arg_name && current_tool) {
+        std::string arg_entry;
         if (arg_count > 0) {
-            current_tool->arguments += ",";
+            arg_entry = ",";
         }
-        current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
+        arg_entry += json(trim(node.text)).dump() + ":";
         ++arg_count;
+
+        // If we have the tool name, add directly; otherwise buffer
+        if (!current_tool->name.empty()) {
+            current_tool->arguments += arg_entry;
+        } else {
+            if (args_buffer.empty()) {
+                args_buffer = "{";
+            }
+            args_buffer += arg_entry;
+        }
     }
 
-    if (is_arg_string && current_tool) {
-        // Serialize to JSON, but exclude the end quote
-        std::string dumped = json(trim_trailing_space(node.text)).dump();
-        current_tool->arguments += dumped.substr(0, dumped.size() - 1);
-        needs_closing_quote = true;
+    if ((is_arg_value || is_arg_string_value) && current_tool) {
+        std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
+
+        std::string value_to_add;
+        if (!value_content.empty()) {
+            // For potential containers, normalize Python-style single quotes to JSON double quotes first
+            // This ensures consistent output during both partial and final parsing
+            // Note: is_arg_string_value means the schema explicitly declares this as a string type,
+            // so we should NOT treat it as a potential container even if it starts with [ or {
+            bool is_potential_container = !is_arg_string_value &&
+                (value_content[0] == '[' || value_content[0] == '{');
+            if (is_potential_container) {
+                value_content = normalize_quotes_to_json(value_content);
+            }
+
+            // Try to parse as JSON value (number, bool, null, object, array)
+            // For strings, we need special handling to support incremental parsing
+            try {
+                json parsed = json::parse(value_content);
+                if (parsed.is_string()) {
+                    // For string values, don't add closing quote yet (added by arg_close)
+                    // This ensures incremental parsing produces monotonic arguments
+                    std::string escaped = parsed.dump();
+                    // Remove the trailing quote
+                    if (!escaped.empty() && escaped.back() == '"') {
+                        escaped.pop_back();
+                    }
+                    value_to_add = escaped;
+                    if (!current_tool->name.empty()) {
+                        needs_closing_quote = true;
+                    } else {
+                        buffer_needs_closing_quote = true;
+                    }
+                } else {
+                    // For non-string values (number, bool, null, object, array), add raw value content
+                    // Using raw content instead of dump() ensures monotonicity for streaming
+                    // (prevents issues with spaces being removed by dump())
+                    value_to_add = value_content;
+                }
+            } catch (...) {
+                // JSON parsing failed - content is either incomplete (partial) or not valid JSON
+                // Note: potential containers were already normalized above, so value_content
+                // already has double quotes if it started with [ or {
+
+                if (node.is_partial && is_potential_container) {
+                    // During incremental parsing, if it looks like a JSON container, don't wrap in quotes yet
+                    // and don't escape. Just pass through the (already normalized) content.
+                    value_to_add = value_content;
+                } else {
+                    // Not valid JSON and NOT a potential partial container - treat as string value
+                    // Add opening quote if not already in a string
+                    if (!current_tool->name.empty()) {
+                        if (!needs_closing_quote) {
+                            value_to_add        = "\"";
+                            needs_closing_quote = true;
+                        }
+                    } else {
+                        if (!buffer_needs_closing_quote) {
+                            value_to_add               = "\"";
+                            buffer_needs_closing_quote = true;
+                        }
+                    }
+                    // Escape special characters in the string content
+                    std::string escaped = json(value_content).dump();
+                    // Remove the surrounding quotes from the escaped string
+                    if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
+                        escaped = escaped.substr(1, escaped.size() - 2);
+                    }
+                    value_to_add += escaped;
+                }
+            }
+        }
+
+        // If we have the tool name, add directly; otherwise buffer
+        if (!current_tool->name.empty()) {
+            current_tool->arguments += value_to_add;
+        } else {
+            if (args_buffer.empty()) {
+                args_buffer = "{";
+            }
+            args_buffer += value_to_add;
+        }
     }
 
     if (is_arg_close && current_tool) {
-        if (needs_closing_quote) {
-            current_tool->arguments += "\"";
-            needs_closing_quote = false;
+        if (!current_tool->name.empty()) {
+            if (needs_closing_quote) {
+                current_tool->arguments += "\"";
+                needs_closing_quote = false;
+            }
+        } else {
+            if (buffer_needs_closing_quote) {
+                if (args_buffer.empty()) {
+                    args_buffer = "{";
+                }
+                args_buffer += "\"";
+                buffer_needs_closing_quote = false;
+            }
         }
     }
 
-    if (is_arg_json && current_tool) {
-        current_tool->arguments += std::string(trim_trailing_space(node.text));
-    }
-
     if (is_tool_close && current_tool) {
-        if (needs_closing_quote) {
-            current_tool->arguments += "\"";
-            needs_closing_quote = false;
+        if (!current_tool->name.empty()) {
+            if (needs_closing_quote) {
+                current_tool->arguments += "\"";
+                needs_closing_quote = false;
+            }
+            // Close the arguments object if using tagged format
+            if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
+                current_tool->arguments += "}";
+            }
+            // If we have a pending tool call that wasn't added yet, add it now
+            if (pending_tool_call.has_value()) {
+                result.tool_calls.push_back(pending_tool_call.value());
+                pending_tool_call.reset();
+            }
+        } else {
+            // We're closing a tool without a name - flush the buffer
+            if (!args_buffer.empty()) {
+                current_tool->arguments = args_buffer;
+                args_buffer.clear();
+            }
+            if (buffer_needs_closing_quote) {
+                current_tool->arguments += "\"";
+                buffer_needs_closing_quote = false;
+            }
+            // Close the arguments object if using tagged format
+            if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
+                current_tool->arguments += "}";
+            }
+            // Don't add to result if no name - this prevents incomplete tool calls
+            pending_tool_call.reset();
         }
-        current_tool->arguments += "}";
     }
 }
diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h
index b84cbed206..920d5cffd4 100644
--- a/common/chat-peg-parser.h
+++ b/common/chat-peg-parser.h
@@ -3,18 +3,28 @@
 #include "chat.h"
 #include "peg-parser.h"
 
+#include <map>
+#include <optional>
+
 class common_chat_peg_builder : public common_peg_parser_builder {
   public:
     static constexpr const char * REASONING_BLOCK = "reasoning-block";
-    static constexpr const char * REASONING = "reasoning";
-    static constexpr const char * CONTENT = "content";
+    static constexpr const char * REASONING       = "reasoning";
+    static constexpr const char * CONTENT         = "content";
 
     common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
+
     common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
+
     common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
+
+    common_peg_parser tag_with_safe_content(const std::string &       tag_name,
+                                            const std::string &       marker,
+                                            const common_peg_parser & p);
 };
 
-inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
+inline common_peg_arena build_chat_peg_parser(
+    const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
     common_chat_peg_builder builder;
     builder.set_root(fn(builder));
     return builder.build();
@@ -26,80 +36,119 @@ class common_chat_peg_mapper {
 
     common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
 
+    virtual ~common_chat_peg_mapper() = default;
+
     virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
     virtual void map(const common_peg_ast_node & node);
 };
 
-class common_chat_peg_native_builder : public common_chat_peg_builder {
+struct content_structure;
+struct tool_call_structure;
+
+class common_chat_peg_unified_builder : public common_chat_peg_builder {
   public:
-    static constexpr const char * TOOL = "tool";
-    static constexpr const char * TOOL_OPEN = "tool-open";
-    static constexpr const char * TOOL_CLOSE = "tool-close";
-    static constexpr const char * TOOL_ID = "tool-id";
-    static constexpr const char * TOOL_NAME = "tool-name";
-    static constexpr const char * TOOL_ARGS = "tool-args";
-
-    common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
-    common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
-    common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
-    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
-    common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
-    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
-};
-
-class common_chat_peg_native_mapper : public common_chat_peg_mapper {
-    common_chat_tool_call * current_tool;
-
-  public:
-    common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
-
-    void map(const common_peg_ast_node & node) override;
-};
-
-inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
-    common_chat_peg_native_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
-
-class common_chat_peg_constructed_builder : public common_chat_peg_builder {
-  public:
-    static constexpr const char * TOOL = "tool";
-    static constexpr const char * TOOL_OPEN = "tool-open";
-    static constexpr const char * TOOL_CLOSE = "tool-close";
-    static constexpr const char * TOOL_NAME = "tool-name";
-    static constexpr const char * TOOL_ARG = "tool-arg";
-    static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
+    // Tag constants
+    static constexpr const char * TOOL           = "tool";
+    static constexpr const char * TOOL_OPEN      = "tool-open";
+    static constexpr const char * TOOL_CLOSE     = "tool-close";
+    static constexpr const char * TOOL_ID        = "tool-id";
+    static constexpr const char * TOOL_NAME      = "tool-name";
+    static constexpr const char * TOOL_ARGS      = "tool-args";
+    static constexpr const char * TOOL_ARG       = "tool-arg";
+    static constexpr const char * TOOL_ARG_OPEN  = "tool-arg-open";
     static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
-    static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
-    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
-    static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
+    static constexpr const char * TOOL_ARG_NAME         = "tool-arg-name";
+    static constexpr const char * TOOL_ARG_VALUE        = "tool-arg-value";
+    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";  // For schema-declared string types
 
+    // Low-level tag methods
     common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
+
     common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
+
     common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+
+    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
+
     common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+
+    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
+
     common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
+
     common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
+
     common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
+
     common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
+
+    common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
+
+    // Use for schema-declared string types - won't be treated as potential JSON container
     common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
-    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
+
+    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
+
+    // High-level building methods
+
+    // Build reasoning block based on ContentStructure
+    common_peg_parser build_reasoning_block(const content_structure & cs,
+                                            common_reasoning_format  reasoning_format,
+                                            bool                     thinking_forced_open);
+
+    // Build content block based on ContentStructure
+    common_peg_parser build_content_block(const content_structure & cs,
+                                         common_reasoning_format reasoning_format,
+                                         const std::string &    tool_section_start = "");
+
+    // Build complete tool section based on ToolCallStructure
+    common_peg_parser build_tool_section(const tool_call_structure & ts,
+                                         const nlohmann::json &    tools,
+                                         bool                      parallel_tool_calls,
+                                         bool                      force_tool_calls);
+
+    // Build single function parser based on ToolCallStructure
+    common_peg_parser build_function(const tool_call_structure & ts,
+                                     const std::string &       name,
+                                     const nlohmann::json &    schema);
+
+    // Build arguments parser based on ToolCallStructure
+    common_peg_parser build_arguments(const tool_call_structure & ts, const nlohmann::json & params);
+
+    // Legacy-compatible helper for building standard JSON tool calls
+    // Used by tests and manual parsers
+    common_peg_parser standard_json_tools(const std::string &    section_start,
+                                          const std::string &    section_end,
+                                          const nlohmann::json & tools,
+                                          bool                   parallel_tool_calls,
+                                          bool                   force_tool_calls);
+
+    // Legacy-compatible helper for building XML/tagged style tool calls
+    // Used by tests and manual parsers
+    common_peg_parser standard_constructed_tools(const std::map<std::string, std::string> & markers,
+                                                 const nlohmann::json &                     tools,
+                                                 bool                                       parallel_tool_calls,
+                                                 bool                                       force_tool_calls);
 };
 
-class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
-    common_chat_tool_call * current_tool;
-    int arg_count = 0;
-    bool needs_closing_quote = false;
-
-  public:
-    common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
-
-    void map(const common_peg_ast_node & node) override;
-};
-
-inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
-    common_chat_peg_constructed_builder builder;
+inline common_peg_arena build_chat_peg_unified_parser(
+    const std::function<common_peg_parser(common_chat_peg_unified_builder & builder)> & fn) {
+    common_chat_peg_unified_builder builder;
     builder.set_root(fn(builder));
     return builder.build();
 }
+
+class common_chat_peg_unified_mapper : public common_chat_peg_mapper {
+    std::optional<common_chat_tool_call> pending_tool_call;  // Tool call waiting for name
+    common_chat_tool_call *              current_tool        = nullptr;
+    int                                  arg_count           = 0;
+    bool                                 needs_closing_quote = false;
+    std::string                          args_buffer;  // Buffer to delay arguments until tool name is known
+    bool                                 buffer_needs_closing_quote = false;  // Track quote state for buffered args
+
+  public:
+    common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+
+    void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & parse_result_arg) override;
+    void map(const common_peg_ast_node & node) override;
+};
diff --git a/common/chat.cpp b/common/chat.cpp
index 9f398eb4a6..1ab77ee518 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1,8 +1,10 @@
 #include "chat.h"
-#include "chat-parser.h"
+
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
 #include "chat-peg-parser.h"
 #include "common.h"
-#include "json-partial.h"
+#include "ggml.h"
 #include "json-schema-to-grammar.h"
 #include "log.h"
 #include "regex-partial.h"
@@ -12,13 +14,13 @@
 #include "jinja/runtime.h"
 #include "jinja/caps.h"
 
-#include <algorithm>
 #include <cstdio>
-#include <cctype>
+#include <cstdlib>
 #include <exception>
 #include <functional>
-#include <iostream>
+
 #include <optional>
+#include <sstream>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -26,14 +28,26 @@
 using json = nlohmann::ordered_json;
 
 static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
-    auto time = std::chrono::system_clock::to_time_t(now);
-    auto local_time = *std::localtime(&time);
+    auto               time       = std::chrono::system_clock::to_time_t(now);
+    auto               local_time = *std::localtime(&time);
     std::ostringstream ss;
     ss << std::put_time(&local_time, format.c_str());
     auto res = ss.str();
     return res;
 }
 
+static json safe_args_parse(const std::string & to_parse) {
+    std::string stripped = to_parse;
+    if (to_parse.at(0) == '"' && to_parse.at(to_parse.length() - 1) == '"') {
+        stripped = to_parse.substr(1, to_parse.length() - 1);
+    }
+    try {
+        return json::parse(stripped);
+    } catch (json::exception & e) {
+        return stripped;
+    }
+}
+
 static std::string string_diff(const std::string & last, const std::string & current) {
     if (last.empty()) {
         return current;
@@ -122,7 +136,8 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
     return jmsg;
 }
 
-std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
+std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv,
+                                                                      const common_chat_msg & msg_new) {
     std::vector<common_chat_msg_diff> diffs;
     if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
         diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
@@ -132,38 +147,56 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
 
     // TODO: these can become expensive for long messages - how to optimize?
     if (msg_prv.reasoning_content != msg_new.reasoning_content) {
-        auto & diff = diffs.emplace_back();
+        auto & diff                  = diffs.emplace_back();
         diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
     }
     if (msg_prv.content != msg_new.content) {
-        auto & diff = diffs.emplace_back();
+        auto & diff        = diffs.emplace_back();
         diff.content_delta = string_diff(msg_prv.content, msg_new.content);
     }
 
     if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
-        throw std::runtime_error("Invalid diff: now finding less tool calls!");
+        std::string err = "Invalid diff: now finding less tool calls!\n";
+        err += "  Previous (" + std::to_string(msg_prv.tool_calls.size()) + "):\n";
+        for (const auto & tc : msg_prv.tool_calls) {
+            err += "    - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+        }
+        err += "  Current (" + std::to_string(msg_new.tool_calls.size()) + "):\n";
+        for (const auto & tc : msg_new.tool_calls) {
+            err += "    - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+        }
+        err += "  Current msg text content:\n" + msg_new.content + "\n";
+        throw std::runtime_error(err);
     }
 
     if (!msg_prv.tool_calls.empty()) {
-        const auto idx = msg_prv.tool_calls.size() - 1;
+        const auto   idx  = msg_prv.tool_calls.size() - 1;
         const auto & pref = msg_prv.tool_calls[idx];
         const auto & newf = msg_new.tool_calls[idx];
-        if (pref.name != newf.name) {
-            throw std::runtime_error("Invalid diff: tool call mismatch!");
+        // Allow tool name to change during incremental parsing:
+        // - empty -> non-empty (initial discovery)
+        // - prefix -> longer string (name grows as more input is parsed)
+        if (pref.name != newf.name && !pref.name.empty() && !newf.name.empty()) {
+            // Check if one is a prefix of the other (for incremental parsing where names grow or shrink)
+            bool is_prefix = (newf.name.rfind(pref.name, 0) == 0);
+            if (!is_prefix) {
+                LOG_ERR("Tool call mismatch: prev='%s' new='%s'\n", pref.name.c_str(), newf.name.c_str());
+                throw std::runtime_error("Invalid diff: tool call mismatch!");
+            }
         }
         const auto args_diff = string_diff(pref.arguments, newf.arguments);
-        if (!args_diff.empty() || pref.id != newf.id) {
-            auto & diff = diffs.emplace_back();
+        if (!args_diff.empty() || pref.id != newf.id || pref.name != newf.name) {
+            auto & diff          = diffs.emplace_back();
             diff.tool_call_index = idx;
-            if (pref.id != newf.id) {
-                diff.tool_call_delta.id = newf.id;
+            if (pref.id != newf.id || pref.name != newf.name) {
+                diff.tool_call_delta.id   = newf.id;
                 diff.tool_call_delta.name = newf.name;
             }
             diff.tool_call_delta.arguments = args_diff;
         }
     }
     for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
-        auto & diff = diffs.emplace_back();
+        auto & diff          = diffs.emplace_back();
         diff.tool_call_index = idx;
         diff.tool_call_delta = msg_new.tool_calls[idx];
     }
@@ -173,94 +206,14 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
 
 using chat_template_caps = jinja::caps;
 
-struct common_chat_template {
-    jinja::program prog;
-    std::string bos_tok;
-    std::string eos_tok;
-    std::string src;
-    chat_template_caps caps;
-
-    common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
-        jinja::lexer lexer;
-        auto lexer_res = lexer.tokenize(src);
-        this->prog = jinja::parse_from_tokens(lexer_res);
-
-        this->src = lexer_res.source;
-        this->bos_tok = bos_token;
-        this->eos_tok = eos_token;
-
-        this->caps = jinja::caps_get(prog);
-        // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
-    }
-
-    const std::string & source() const { return src; }
-    const std::string & bos_token() const { return bos_tok; }
-    const std::string & eos_token() const { return eos_tok; }
-
-    // TODO: this is ugly, refactor it somehow
-    json add_system(const json & messages, const std::string & system_prompt) const {
-        GGML_ASSERT(messages.is_array());
-        auto msgs_copy = messages;
-        if (!caps.supports_system_role) {
-            if (msgs_copy.empty()) {
-                msgs_copy.insert(msgs_copy.begin(), json{
-                    {"role", "user"},
-                    {"content", system_prompt}
-                });
-            } else {
-                auto & first_msg = msgs_copy[0];
-                if (!first_msg.contains("content")) {
-                    first_msg["content"] = "";
-                }
-                first_msg["content"] = system_prompt + "\n\n"
-                    + first_msg["content"].get<std::string>();
-            }
-        } else {
-            if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
-                msgs_copy.insert(msgs_copy.begin(), json{
-                    {"role", "system"},
-                    {"content", system_prompt}
-                });
-            } else if (msgs_copy[0].at("role") == "system") {
-                msgs_copy[0]["content"] = system_prompt;
-            }
-        }
-        return msgs_copy;
-    }
-
-    chat_template_caps original_caps() const {
-        return caps;
-    }
-
-};
-
 struct common_chat_templates {
     bool add_bos;
     bool add_eos;
-    bool has_explicit_template; // Model had builtin template or template overridde was specified.
-    std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
+    bool has_explicit_template;  // Model had builtin template or template overridde was specified.
+    std::unique_ptr<common_chat_template> template_default;  // always set (defaults to chatml)
     std::unique_ptr<common_chat_template> template_tool_use;
 };
 
-struct templates_params {
-    json messages;
-    json tools;
-    common_chat_tool_choice tool_choice;
-    json json_schema;
-    bool parallel_tool_calls;
-    common_reasoning_format reasoning_format;
-    bool stream;
-    std::string grammar;
-    bool add_generation_prompt = true;
-    bool enable_thinking = true;
-    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
-    json extra_context;
-    bool add_bos;
-    bool add_eos;
-    bool is_inference = true;
-    bool mark_input = true; // whether to mark input strings in the jinja context
-};
-
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
     if (tool_choice == "auto") {
         return COMMON_CHAT_TOOL_CHOICE_AUTO;
@@ -276,13 +229,13 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
 
 bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
     common_chat_templates_inputs dummy_inputs;
-    common_chat_msg msg;
-    msg.role = "user";
-    msg.content = "test";
-    dummy_inputs.messages = {msg};
-    dummy_inputs.enable_thinking = false;
-    const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
-    dummy_inputs.enable_thinking = true;
+    common_chat_msg              msg;
+    msg.role                          = "user";
+    msg.content                       = "test";
+    dummy_inputs.messages             = { msg };
+    dummy_inputs.enable_thinking      = false;
+    const auto rendered_no_thinking   = common_chat_templates_apply(chat_templates, dummy_inputs);
+    dummy_inputs.enable_thinking      = true;
     const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
     return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
 }
@@ -291,7 +244,6 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
     std::vector<common_chat_msg> msgs;
 
     try {
-
         if (!messages.is_array()) {
             throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
         }
@@ -307,7 +259,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
             }
             msg.role = message.at("role");
 
-            auto has_content = message.contains("content");
+            auto has_content    = message.contains("content");
             auto has_tool_calls = message.contains("tool_calls");
             if (has_content) {
                 const auto & content = message.at("content");
@@ -328,7 +280,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                         msg.content_parts.push_back(msg_part);
                     }
                 } else if (!content.is_null()) {
-                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " +
+                                                content.dump() +
+                                                " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
                 }
             }
             if (has_tool_calls) {
@@ -348,8 +302,13 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                     if (!fc.contains("name")) {
                         throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
                     }
-                    tc.name = fc.at("name");
-                    tc.arguments = fc.at("arguments");
+                    tc.name           = fc.at("name");
+                    const auto & args = fc.at("arguments");
+                    if (args.is_string()) {
+                        tc.arguments = args;
+                    } else {
+                        tc.arguments = args.dump();
+                    }
                     if (tool_call.contains("id")) {
                         tc.id = tool_call.at("id");
                     }
@@ -357,7 +316,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                 }
             }
             if (!has_content && !has_tool_calls) {
-                throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+                throw std::invalid_argument(
+                    "Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & "
+                    "https://github.com/ggml-org/llama.cpp/issues/12279)");
             }
             if (message.contains("reasoning_content")) {
                 msg.reasoning_content = message.at("reasoning_content");
@@ -432,12 +393,13 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
     auto result = json::array();
     for (const auto & tool : tools) {
         result.push_back({
-            {"type", "function"},
-            {"function", {
-                {"name", tool.name},
-                {"description", tool.description},
-                {"parameters", json::parse(tool.parameters)},
-            }},
+            { "type",     "function" },
+            { "function",
+             {
+                  { "name", tool.name },
+                  { "description", tool.description },
+                  { "parameters", json::parse(tool.parameters) },
+              }                      },
         });
     }
     return result;
@@ -455,16 +417,20 @@ json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
         json tool_call;
         tool_call["index"] = diff.tool_call_index;
         if (!diff.tool_call_delta.id.empty()) {
-            tool_call["id"] = diff.tool_call_delta.id;
+            tool_call["id"]   = diff.tool_call_delta.id;
             tool_call["type"] = "function";
         }
-        json function = json::object();
-        if (!diff.tool_call_delta.name.empty()) {
-            function["name"] = diff.tool_call_delta.name;
+        if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) {
+            json function = json::object();
+            if (!diff.tool_call_delta.name.empty()) {
+                function["name"] = diff.tool_call_delta.name;
+            }
+            if (!diff.tool_call_delta.arguments.empty()) {
+                function["arguments"] = diff.tool_call_delta.arguments;
+            }
+            tool_call["function"] = function;
         }
-        function["arguments"] = diff.tool_call_delta.arguments;
-        tool_call["function"] = function;
-        delta["tool_calls"] = json::array({tool_call});
+        delta["tool_calls"] = json::array({ tool_call });
     }
     return delta;
 }
@@ -473,13 +439,13 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
     if (use_jinja) {
         try {
             common_chat_msg msg;
-            msg.role = "user";
+            msg.role    = "user";
             msg.content = "test";
 
             auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl);
 
             common_chat_templates_inputs inputs;
-            inputs.messages = {msg};
+            inputs.messages = { msg };
 
             common_chat_templates_apply(tmpls.get(), inputs);
             return true;
@@ -488,28 +454,28 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
             return false;
         }
     }
-    llama_chat_message chat[] = {{"user", "test"}};
+    llama_chat_message chat[] = {
+        { "user", "test" }
+    };
     const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0);
     return res >= 0;
 }
 
-std::string common_chat_format_single(
-        const struct common_chat_templates * tmpls,
-        const std::vector<common_chat_msg> & past_msg,
-        const common_chat_msg & new_msg,
-        bool add_ass,
-        bool use_jinja) {
-
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+                                      const std::vector<common_chat_msg> & past_msg,
+                                      const common_chat_msg &              new_msg,
+                                      bool                                 add_ass,
+                                      bool                                 use_jinja) {
     common_chat_templates_inputs inputs;
     inputs.use_jinja = use_jinja;
-    inputs.add_bos = tmpls->add_bos;
-    inputs.add_eos = tmpls->add_eos;
+    inputs.add_bos   = tmpls->add_bos;
+    inputs.add_eos   = tmpls->add_eos;
 
     std::string fmt_past_msg;
     if (!past_msg.empty()) {
-        inputs.messages = past_msg;
+        inputs.messages              = past_msg;
         inputs.add_generation_prompt = false;
-        fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+        fmt_past_msg                 = common_chat_templates_apply(tmpls, inputs).prompt;
     }
     std::ostringstream ss;
     // if the past_msg ends with a newline, we must preserve it in the formatted version
@@ -519,37 +485,39 @@ std::string common_chat_format_single(
     // format chat with new_msg
     inputs.messages.push_back(new_msg);
     inputs.add_generation_prompt = add_ass;
-    auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+    auto fmt_new_msg             = common_chat_templates_apply(tmpls, inputs).prompt;
     // get the diff part
     ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
     return ss.str();
 }
 
-std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
+std::string common_chat_format_example(const struct common_chat_templates *       tmpls,
+                                       bool                                       use_jinja,
+                                       const std::map<std::string, std::string> & chat_template_kwargs) {
     common_chat_templates_inputs inputs;
-    inputs.use_jinja = use_jinja;
-    inputs.add_bos = tmpls->add_bos;
-    inputs.add_eos = tmpls->add_eos;
+    inputs.use_jinja            = use_jinja;
+    inputs.add_bos              = tmpls->add_bos;
+    inputs.add_eos              = tmpls->add_eos;
     inputs.chat_template_kwargs = chat_template_kwargs;
-    auto add_simple_msg = [&](auto role, auto content) {
+    auto add_simple_msg         = [&](auto role, auto content) {
         common_chat_msg msg;
-        msg.role = role;
+        msg.role    = role;
         msg.content = content;
         inputs.messages.push_back(msg);
     };
-    add_simple_msg("system",    "You are a helpful assistant");
-    add_simple_msg("user",      "Hello");
+    add_simple_msg("system", "You are a helpful assistant");
+    add_simple_msg("user", "Hello");
     add_simple_msg("assistant", "Hi there");
-    add_simple_msg("user",      "How are you?");
+    add_simple_msg("user", "How are you?");
     return common_chat_templates_apply(tmpls, inputs).prompt;
 }
 
-#define CHATML_TEMPLATE_SRC \
-    "{%- for message in messages -%}\n" \
+#define CHATML_TEMPLATE_SRC                                                               \
+    "{%- for message in messages -%}\n"                                                   \
     "  {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
-    "{%- endfor -%}\n" \
-    "{%- if add_generation_prompt -%}\n" \
-    "  {{- '<|im_start|>assistant\n' -}}\n" \
+    "{%- endfor -%}\n"                                                                    \
+    "{%- if add_generation_prompt -%}\n"                                                  \
+    "  {{- '<|im_start|>assistant\n' -}}\n"                                               \
     "{%- endif -%}"
 
 void common_chat_templates_free(struct common_chat_templates * tmpls) {
@@ -567,19 +535,16 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
                 return tmpls->template_tool_use->source();
             }
             return "";
-        } else {
-            LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
         }
+        LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
     }
     return tmpls->template_default->source();
 }
 
-common_chat_templates_ptr common_chat_templates_init(
-    const struct llama_model * model,
-    const std::string & chat_template_override,
-    const std::string & bos_token_override,
-    const std::string & eos_token_override)
-{
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+                                                     const std::string &        chat_template_override,
+                                                     const std::string &        bos_token_override,
+                                                     const std::string &        eos_token_override) {
     std::string default_template_src;
     std::string template_tool_use_src;
 
@@ -588,7 +553,7 @@ common_chat_templates_ptr common_chat_templates_init(
         GGML_ASSERT(model != nullptr);
         const auto * str = llama_model_chat_template(model, /* name */ nullptr);
         if (str) {
-            default_template_src = str;
+            default_template_src  = str;
             has_explicit_template = true;
         }
         str = llama_model_chat_template(model, /* name */ "tool_use");
@@ -610,34 +575,40 @@ common_chat_templates_ptr common_chat_templates_init(
     // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error
     // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633
     if (default_template_src.find("<|channel|>") != std::string::npos
-            // search for the error message and patch it
-            && default_template_src.find("in message.content or") != std::string::npos) {
+        // search for the error message and patch it
+        && default_template_src.find("in message.content or") != std::string::npos) {
         string_replace_all(default_template_src,
-            "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}",
-            "{%- if false %}");
+                           "{%- if \"<|channel|>analysis<|message|>\" in message.content or "
+                           "\"<|channel|>final<|message|>\" in message.content %}",
+                           "{%- if false %}");
     }
 
     // TODO @aldehir : this is a temporary fix, pending Minja changes
     // Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
     if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
-            // search for the error message and patch it
-            && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
+        // search for the error message and patch it
+        && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
         string_replace_all(default_template_src,
-            "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
-            "{%- if false %}");
+                           "{%- if (message['content'] is none or message['content'] == '' or "
+                           "message['content']|length == 0) and (message['tool_calls'] is not defined or "
+                           "message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
+                           "{%- if false %}");
     }
 
     std::string token_bos = bos_token_override;
     std::string token_eos = eos_token_override;
-    bool add_bos = false;
-    bool add_eos = false;
+    bool        add_bos   = false;
+    bool        add_eos   = false;
     if (model) {
-        const auto * vocab = llama_model_get_vocab(model);
-        const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
+        const auto * vocab     = llama_model_get_vocab(model);
+        const auto   get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
             if (token == LLAMA_TOKEN_NULL) {
-                if (default_template_src.find(jinja_variable_name) != std::string::npos
-                    || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
-                    LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name);
+                if (default_template_src.find(jinja_variable_name) != std::string::npos ||
+                    template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
+                    LOG_WRN(
+                        "common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't "
+                          "work as intended.\n",
+                        name);
                 }
                 return std::string();
             }
@@ -645,13 +616,13 @@ common_chat_templates_ptr common_chat_templates_init(
         };
         token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token");
         token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token");
-        add_bos = llama_vocab_get_add_bos(vocab);
-        add_eos = llama_vocab_get_add_eos(vocab);
+        add_bos   = llama_vocab_get_add_bos(vocab);
+        add_eos   = llama_vocab_get_add_eos(vocab);
     }
     common_chat_templates_ptr tmpls(new common_chat_templates());
     tmpls->has_explicit_template = has_explicit_template;
-    tmpls->add_bos = add_bos;
-    tmpls->add_eos = add_eos;
+    tmpls->add_bos               = add_bos;
+    tmpls->add_eos               = add_eos;
     try {
         tmpls->template_default = std::make_unique<common_chat_template>(default_template_src, token_bos, token_eos);
     } catch (const std::exception & e) {
@@ -672,36 +643,12 @@ common_chat_templates_ptr common_chat_templates_init(
 
 const char * common_chat_format_name(common_chat_format format) {
     switch (format) {
-        case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
-        case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
-        case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
-        case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
-        case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
-        case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
-        case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
-        case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
-        case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
-        case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
-        case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
-        case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
-        case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
-        case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
-        case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
-        case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
-        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
-        case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
-        case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
-        case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
-        case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
-        case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
-        case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
-        case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
-        case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
-        case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
-        case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
-        case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
+        case COMMON_CHAT_FORMAT_CONTENT_ONLY:
+            return "Content-only";
+        case COMMON_CHAT_FORMAT_PEG_SIMPLE:
+            return "peg-simple";
+        case COMMON_CHAT_FORMAT_PEG_NATIVE:
+            return "peg-native";
         default:
             throw std::runtime_error("Unknown chat format");
     }
@@ -709,10 +656,14 @@ const char * common_chat_format_name(common_chat_format format) {
 
 const char * common_reasoning_format_name(common_reasoning_format format) {
     switch (format) {
-        case COMMON_REASONING_FORMAT_NONE:     return "none";
-        case COMMON_REASONING_FORMAT_AUTO:     return "auto";
-        case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
-        case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
+        case COMMON_REASONING_FORMAT_NONE:
+            return "none";
+        case COMMON_REASONING_FORMAT_AUTO:
+            return "auto";
+        case COMMON_REASONING_FORMAT_DEEPSEEK:
+            return "deepseek";
+        case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY:
+            return "deepseek-legacy";
         default:
             throw std::runtime_error("Unknown reasoning format");
     }
@@ -721,11 +672,14 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
 common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
     if (format == "none") {
         return COMMON_REASONING_FORMAT_NONE;
-    } else if (format == "auto") {
+    }
+    if (format == "auto") {
         return COMMON_REASONING_FORMAT_AUTO;
-    } else if (format == "deepseek") {
+    }
+    if (format == "deepseek") {
         return COMMON_REASONING_FORMAT_DEEPSEEK;
-    } else if (format == "deepseek-legacy") {
+    }
+    if (format == "deepseek-legacy") {
         return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
     }
     throw std::runtime_error("Unknown reasoning format: " + format);
@@ -741,7 +695,8 @@ static void foreach_function(const json & tools, const std::function<void(const
     }
 }
 
-static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
+static void foreach_parameter(const json &                                                         function,
+                              const std::function<void(const std::string &, const json &, bool)> & fn) {
     if (!function.contains("parameters") || !function.at("parameters").is_object()) {
         return;
     }
@@ -749,7 +704,7 @@ static void foreach_parameter(const json & function, const std::function<void(co
     if (!params.contains("properties") || !params.at("properties").is_object()) {
         return;
     }
-    const auto & props = params.at("properties");
+    const auto &          props = params.at("properties");
     std::set<std::string> required;
     if (params.contains("required") && params.at("required").is_array()) {
         params.at("required").get_to(required);
@@ -760,19 +715,19 @@ static void foreach_parameter(const json & function, const std::function<void(co
     }
 }
 
-static std::string apply(
+std::string common_chat_template_direct_apply(
     const common_chat_template & tmpl,
     const struct templates_params & inputs,
-    const std::optional<json> & messages_override = std::nullopt,
-    const std::optional<json> & tools_override = std::nullopt,
-    const std::optional<json> & additional_context = std::nullopt)
-{
+    const std::optional<json> & messages_override,
+    const std::optional<json> & tools_override,
+    const std::optional<json> & additional_context) {
     jinja::context ctx(tmpl.source());
 
     nlohmann::ordered_json inp = nlohmann::ordered_json{
         {"messages", messages_override.has_value() ? *messages_override : inputs.messages},
         {"bos_token", tmpl.bos_token()},
         {"eos_token", tmpl.eos_token()},
+        {"enable_thinking", inputs.enable_thinking},
     };
     if (tools_override.has_value() || !inputs.tools.empty()) {
         inp["tools"] = tools_override.has_value() ? *tools_override : inputs.tools;
@@ -798,7 +753,7 @@ static std::string apply(
     // render
     jinja::runtime runtime(ctx);
     const jinja::value results = runtime.execute(tmpl.prog);
-    auto parts = runtime.gather_string_parts(results);
+    auto parts = jinja::runtime::gather_string_parts(results);
 
     std::string result = parts->as_string().str();
 
@@ -812,265 +767,8 @@ static std::string apply(
     return result;
 }
 
-static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    auto tool_call_schemas = json::array();
-    foreach_function(inputs.tools, [&](const json & tool) {
-        const auto & function = tool.at("function");
-        auto tool_schema = json {
-            {"type", "object"},
-            {"properties", {
-                {"name", {
-                    {"type", "string"},
-                    {"const", function.at("name")},
-                }},
-                {"arguments", function.at("parameters")},
-            }},
-            {"required", json::array({"name", "arguments"})},
-        };
-        if (function.contains("description")) {
-            tool_schema["description"] = function.at("description");
-        }
-        if (inputs.parallel_tool_calls) {
-            tool_schema.at("properties")["id"] = {
-                {"type", "string"},
-                {"minLength", 4},
-            };
-            tool_schema.at("required").push_back("id");
-        }
-        tool_call_schemas.emplace_back(tool_schema);
-    });
-    const auto tool_call =
-        inputs.parallel_tool_calls
-            ? json {
-                {"type", "object"},
-                {"properties", {
-                    {"tool_calls", {
-                        {"type", "array"},
-                        {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
-                            {"anyOf", tool_call_schemas},
-                        }},
-                        {"minItems", 1},
-                    }},
-                }},
-                {"required", json::array({"tool_calls"})},
-            }
-            : json {
-                {"type", "object"},
-                {"properties", {
-                    {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
-                        {"anyOf", tool_call_schemas},
-                    }},
-                }},
-                {"required", json::array({"tool_call"})},
-            };
-    const auto schema =
-        inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED
-            ? json {
-                {"anyOf", json::array({
-                    tool_call,
-                    {
-                        {"type", "object"},
-                        {"properties", {
-                            {"response", inputs.json_schema.is_null()
-                                ? json {{"type", "string"}}
-                                : inputs.json_schema
-                            },
-                        }},
-                        {"required", json::array({"response"})},
-                    },
-                })}
-            }
-            : tool_call;
-
-    data.grammar_lazy = false;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        builder.add_schema("root", schema);
-    });
-
-    auto tweaked_messages = tmpl.add_system(
-        inputs.messages,
-        "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
-
-    // ensure all messages has "content" field
-    for (auto & message : tweaked_messages) {
-        if (!message.contains("content") || message["content"].is_null()) {
-            message["content"] = "";
-        }
-    }
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
-    data.format = COMMON_CHAT_FORMAT_GENERIC;
-    return data;
-}
-
-static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        auto schemas = json::array();
-        foreach_function(inputs.tools, [&](const json & tool) {
-            const auto & function = tool.at("function");
-            schemas.push_back({
-                {"type", "object"},
-                {"properties", {
-                    // Important note: the model is probably trained to take a JSON stringified arguments value.
-                    // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
-                    {"name", {
-                        {"type", "string"},
-                        {"const", function.at("name")},
-                    }},
-                    {"arguments", function.at("parameters")},
-                    {"id", {
-                        {"type", "string"},
-                        // Nemo's template expects a 9-character alphanumeric ID.
-                        {"pattern", "^[a-zA-Z0-9]{9}$"},
-                    }},
-                }},
-                {"required", json::array({"name", "arguments", "id"})},
-            });
-        });
-        auto schema = json {
-            {"type", "array"},
-            {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-            {"minItems", 1},
-        };
-        if (!inputs.parallel_tool_calls) {
-            schema["maxItems"] = 1;
-        }
-        builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
-    });
-    data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
-    data.preserved_tokens = {
-        "[TOOL_CALLS]",
-    };
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
-    return data;
-}
-
-
-// Case-insensitive find
-static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
-    auto it = std::search(
-        haystack.begin() + pos, haystack.end(),
-        needle.begin(), needle.end(),
-        [](char a, char b) { return std::tolower(a) == std::tolower(b); }
-    );
-    return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
-}
-
-static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    const auto is_json_schema_provided = !inputs.json_schema.is_null();
-    const auto is_grammar_provided = !inputs.grammar.empty();
-    const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
-
-    // the logic requires potentially modifying the messages
-    auto tweaked_messages = inputs.messages;
-
-    auto replace_json_schema_marker = [](json & messages) -> bool {
-        static std::string marker1 = "force json schema.\n";
-        static std::string marker2 = "force json schema.";
-
-        if (messages.empty() || messages.at(0).at("role") != "system") {
-            return false;
-        }
-
-        std::string content = messages.at(0).at("content");
-
-        for (const auto & marker : {marker1, marker2}) {
-            const auto pos = ifind_string(content, marker);
-            if (pos != std::string::npos) {
-                content.replace(pos, marker.length(), "");
-                // inject modified content back into the messages
-                messages.at(0).at("content") = content;
-                return true;
-            }
-        }
-
-        return false;
-    };
-
-    // Lfm2 model does not natively work with json, but can generally understand the tools structure
-    //
-    // Example of the pytorch dialog structure:
-    //     <|startoftext|><|im_start|>system
-    //     List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
-    //     <|im_start|>user
-    //     What is the current status of candidate ID 12345?<|im_end|>
-    //     <|im_start|>assistant
-    //     <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
-    //     <|im_start|>tool
-    //     <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
-    //     <|im_start|>assistant
-    //     The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
-    //
-    // For the llama server compatibility with json tools semantic,
-    // the client can add "Follow json schema." line into the system message prompt to force the json output.
-    //
-    if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
-        // server/utils.hpp prohibits that branch for the custom grammar anyways
-        throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
-    } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
-        LOG_INF("%s: Using tools to build a grammar\n", __func__);
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-
-            builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
-        });
-        // model has no concept of tool selection mode choice,
-        // if the system prompt rendered correctly it will produce a tool call
-        // the grammar goes inside the tool call body
-        data.grammar_lazy = true;
-        data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
-        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
-        data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
-    } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
-        LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
-        // output those tokens
-        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
-    } else if (is_json_schema_provided) {
-        LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
-        data.grammar = json_schema_to_grammar(inputs.json_schema);
-    } else if (is_grammar_provided) {
-        LOG_INF("%s: Using provided grammar\n", __func__);
-        data.grammar = inputs.grammar;
-    } else {
-        LOG_INF("%s: Using content relying on the template\n", __func__);
-    }
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
-    LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_ministral_3(const common_chat_template &    tmpl,
+                                                              const struct templates_params & inputs) {
     common_chat_params data;
 
     // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
@@ -1088,8 +786,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         // If message contains `reasoning_content`, add it as a block of type `thinking`
         if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
             content.push_back({
-                {"type", "thinking"},
-                {"thinking", msg.at("reasoning_content").get<std::string>()},
+                { "type",     "thinking"                                     },
+                { "thinking", msg.at("reasoning_content").get<std::string>() },
             });
         }
 
@@ -1097,8 +795,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         if (msg.contains("content")) {
             if (msg.at("content").is_string()) {
                 content.push_back({
-                    {"type", "text"},
-                    {"text", msg.at("content").get<std::string>()},
+                    { "type", "text"                               },
+                    { "text", msg.at("content").get<std::string>() },
                 });
             } else if (msg.at("content").is_array()) {
                 auto blocks = msg.at("content");
@@ -1106,18 +804,18 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
             }
         }
 
-        auto adjusted = msg;
+        auto adjusted       = msg;
         adjusted["content"] = content;
         adjusted.erase("reasoning_content");
         adjusted_messages.push_back(adjusted);
     }
 
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
     auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-    auto include_grammar = true;
+    auto include_grammar   = true;
 
-    data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
-    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.prompt           = common_chat_template_direct_apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
+    data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
     data.preserved_tokens = {
         "[THINK]",
         "[/THINK]",
@@ -1125,13 +823,15 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         "[ARGS]",
     };
 
-    auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
-        auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        auto reasoning =
+            extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
 
         // Response format parser
         if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
             // Ministral wants to emit json surrounded by code fences
-            return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
+            return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema))
+                             << "```";
         }
 
         // Tool call parser
@@ -1139,17 +839,16 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
             auto tool_choice = p.choice();
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                const auto & schema = function.at("parameters");
+                std::string  name     = function.at("name");
+                const auto & schema   = function.at("parameters");
 
-                tool_choice |= p.rule("tool-" + name,
-                    p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
-                    + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
-                );
+                tool_choice |=
+                    p.rule("tool-" + name, p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") +
+                                               p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
             });
 
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
+            auto min_calls  = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+            auto max_calls  = inputs.parallel_tool_calls ? -1 : 1;
             auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
 
             return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
@@ -1168,838 +867,32 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                 builder.resolve_refs(schema);
             });
             parser.build_grammar(builder, data.grammar_lazy);
         });
 
         data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]" }
         };
     }
 
     return data;
 }
 
-static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
-    data.preserved_tokens = {
-        "[THINK]",
-        "[/THINK]",
-    };
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                        {"id", {
-                            {"type", "string"},
-                            {"pattern", "^[a-zA-Z0-9]{9}$"},
-                        }},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
-        });
-        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
-        data.preserved_tokens.push_back("[TOOL_CALLS]");
-    } else {
-        data.grammar_lazy = false;
-        if (!inputs.json_schema.is_null()) {
-            if (!inputs.grammar.empty()) {
-                throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
-            }
-            data.grammar = json_schema_to_grammar(inputs.json_schema);
-        } else {
-            data.grammar = inputs.grammar;
-        }
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
-        auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
-        if (has_reasoning_content && has_tool_calls) {
-            auto adjusted_message = msg;
-            adjusted_message["tool_plan"] = msg.at("reasoning_content");
-            adjusted_message.erase("reasoning_content");
-            adjusted_messages.push_back(adjusted_message);
-        } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-    data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
-    if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "<|END_THINKING|>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
-        data.prompt += "<|START_THINKING|><|END_THINKING|>";
-    }
-
-    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        auto schemas = json::array();
-        foreach_function(inputs.tools, [&](const json & tool) {
-            const auto & function = tool.at("function");
-            schemas.push_back({
-                {"type", "object"},
-                {"properties", {
-                    {"tool_call_id", {
-                        {"type", "string"},
-                        // Command-R's template expects an integer string.
-                        {"pattern", "^[0-9]{1,10}$"},
-                    }},
-                    {"tool_name", {
-                        {"type", "string"},
-                        {"const", function.at("name")},
-                    }},
-                    {"parameters", function.at("parameters")},
-                }},
-                {"required", json::array({"tool_call_id", "tool_name", "parameters"})},
-            });
-        });
-        auto schema = json {
-            {"type", "array"},
-            {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-            {"minItems", 1},
-        };
-        if (!inputs.parallel_tool_calls) {
-            schema["maxItems"] = 1;
-        }
-        builder.add_rule("root",
-            std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
-            "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
-    });
-    data.grammar_triggers.push_back({
-        COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-        // If thinking_forced_open, then we capture the </think> tag in the grammar,
-        // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-        std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
-            "(<\\|START_ACTION\\|>)[\\s\\S]*"
-    });
-    data.preserved_tokens = {
-        "<|START_ACTION|>",
-        "<|END_ACTION|>",
-        "<|START_RESPONSE|>",
-        "<|END_RESPONSE|>",
-        "<|START_THINKING|>",
-        "<|END_THINKING|>",
-    };
-    return data;
-}
-
-static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
-    if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
-        throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
-    }
-    const auto & parameters_properties = parameters.at("properties");
-    const auto & parameters_required = parameters.at("required");
-    for (const auto & prop : expected_properties) {
-        if (!parameters_properties.contains(prop)) {
-            throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT
-        }
-        if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
-            throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT
-        }
-    }
-    if (parameters_properties.size() != expected_properties.size()) {
-        throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
-    }
-}
-
-static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
-    auto builtin_tools = json::array();
-    common_chat_params data;
-    if (!inputs.tools.is_null()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-
-            auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
-                if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
-                    expect_tool_parameters(name, parameters, {"query"});
-                } else if (name == "python" || name == "code_interpreter") {
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
-                    expect_tool_parameters(name, parameters, {"code"});
-                } else {
-                    return false;
-                }
-
-                std::vector<std::string> kvs;
-                for (const auto & [key, value] : parameters.at("properties").items()) {
-                    kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
-                }
-
-                tool_rules.push_back(
-                    builder.add_rule(
-                        name + "-call",
-                        "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
-                builtin_tools.push_back(name);
-
-                return true;
-            };
-
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
-                if (allow_python_tag_builtin_tools) {
-                    handle_builtin_tool(name, parameters);
-                }
-                tool_rules.push_back(
-                    builder.add_rule(
-                        name + "-call",
-                        "\"{\" space "
-                        "( \"\\\"type\\\"\"       space \":\" space \"\\\"function\\\"\"     space \",\" space )? "
-                        "  \"\\\"name\\\"\"       space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
-                        "  \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
-                        "\"}\" space"));
-            });
-            // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
-            });
-            if (!builtin_tools.empty()) {
-                data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
-                data.preserved_tokens.push_back("<|python_tag|>");
-            }
-            // Allow a few empty lines on top of the usual constrained json schema space rule.
-            builder.add_rule("root", string_join(tool_rules, " | "));
-            data.additional_stops.push_back("<|eom_id|>");
-        });
-        data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
-            ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
-            : COMMON_CHAT_FORMAT_LLAMA_3_X;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json {
-        {"date_string", format_time(inputs.now, "%d %b %Y")},
-        {"tools_in_user_message", false},
-        {"builtin_tools", builtin_tools},
-    });
-    return data;
-}
-
-static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Generate the prompt using the apply() function with the template
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
-
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
-    if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = true;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    { "type",       "object"                                                   },
-                    { "properties",
-                        {
-                            { "name",
-                            {
-                                { "type", "string" },
-                                { "const", function.at("name") },
-                            } },
-                            { "arguments", function.at("parameters") },
-                        }                                                                        },
-                    { "required",   json::array({ "name", "arguments" }) },
-                });
-            });
-            auto schema = json{
-                        { "type",     "array"                                                         },
-                        { "items",    schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
-                        { "minItems", 1                                                               },
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root",
-                                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                                    "\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
-                                    " \"</TOOLCALL>\"");
-        });
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-            // If thinking_forced_open, then we capture the </think> tag in the grammar,
-            // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-            std::string(data.thinking_forced_open ?
-                            "[\\s\\S]*?(</think>\\s*)" :
-                            "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                "(<TOOLCALL>)[\\s\\S]*" });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
-
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<tool_call>",
-        "</tool_call>",
-    };
-
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
-    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-    auto include_grammar = true;
-
-    auto parser = build_chat_peg_constructed_parser([&](auto & p) {
-        auto reasoning = p.eps();
-        if (inputs.enable_thinking && extract_reasoning) {
-            auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
-            if (data.thinking_forced_open) {
-                reasoning = reasoning_content;
-            }
-        }
-
-        // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
-            return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
-        }
-
-        // Tool call parser
-        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
-            auto tool_choice = p.choice();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-
-                auto schema_info = common_schema_info();
-                schema_info.resolve_refs(parameters);
-
-                auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
-                auto tool_close = p.literal("</function>\n");
-                auto args = p.sequence();
-                auto arg_string = p.rule("xml-arg-string", p.until_one_of({
-                    "\n</parameter>",
-                    "\n<parameter=",
-                    "\n</function>"
-                }));
-
-                foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
-                    auto rule_name = "tool-" + name + "-arg-" + param_name;
-
-                    auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
-                    auto arg_close = p.literal("</parameter>\n");
-                    auto arg_value = p.eps();
-
-                    if (schema_info.resolves_to_string(param_schema)) {
-                        arg_value = p.tool_arg_string_value(arg_string) + "\n";
-                    } else {
-                        arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
-                    }
-
-                    // Model may or my not close with </parameter>
-                    auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
-                    args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
-                });
-
-                tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
-            });
-
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-            auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
-            auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
-
-            return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
-        }
-
-        // Content only parser
-        include_grammar = false;
-        return reasoning << p.content(p.rest());
-    });
-
-    data.parser = parser.save();
-
-    if (include_grammar) {
-        data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
-                builder.resolve_refs(schema);
-            });
-            parser.build_grammar(builder, data.grammar_lazy);
-        });
-
-        data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
-        };
-    }
-
-    return data;
-}
-
-
-static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Generate the prompt using the apply() function with the template
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_APERTUS;
-
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "<|inner_suffix|>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    // When tools are present, build grammar for the <|tools_prefix|> format
-    if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = true;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    { "type",       "object"                                                   },
-                    { "properties",
-                        {
-                            { function.at("name"), function.at("parameters") }
-                        }                                                                        },
-                    { "required",   json::array({ function.at("name") }) },
-                });
-            });
-            auto schema = json{
-                        { "type",     "array"                                                         },
-                        { "items",    schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
-                        { "minItems", 1                                                               },
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root",
-                                std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
-                                    "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
-                            });
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-            // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
-            // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-            std::string(data.thinking_forced_open ?
-                            "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
-                            "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
-                "(<\\|tools_prefix\\|>)[\\s\\S]*" });
-        data.preserved_tokens = {
-            "<|system_start|>",
-            "<|system_end|>",
-            "<|developer_start|>",
-            "<|developer_end|>",
-            "<|user_start|>",
-            "<|user_end|>",
-            "<|assistant_start|>",
-            "<|assistant_end|>",
-            "<|inner_prefix|>",
-            "<|inner_suffix|>",
-            "<|tools_prefix|>",
-            "<|tools_suffix|>",
-        };
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    auto prompt = apply(tmpl, inputs);
-
-    // Hacks to fix the official (broken) prompt.
-    // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
-    // until the official template is fixed.
-    if (tmpl.source().find("{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}") != std::string::npos) {
-        // Don't leave the chat dangling after tool results
-        if (string_ends_with(prompt, "<｜tool▁outputs▁end｜>")) {
-            prompt += "<｜end▁of▁sentence｜>";
-            if (inputs.add_generation_prompt) {
-                prompt += "<｜Assistant｜>";
-            }
-        }
-        // Fix up tool call delta example added by Minja
-        prompt = std::regex_replace(
-            prompt,
-            std::regex("(<｜tool▁call▁end｜>)[\\s\\r\\n]*(<｜tool▁outputs▁begin｜>|<｜User｜>)"),
-            "$1<｜tool▁calls▁end｜><｜end▁of▁sentence｜>$2");
-    }
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<｜tool▁call▁begin｜>\" )? \"function<｜tool▁sep｜>" + name + "\\n"
-                    "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
-                    "\"```<｜tool▁call▁end｜>\""));
-            });
-            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            // so we accept common variants (then it's all constrained)
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" | \"<｜tool\\\\_calls\\\\_begin｜>\" | \"<｜tool▁calls｜>\" ) "
-                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<｜tool▁calls▁end｜>\""
-                " space");
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<｜tool▁calls▁begin｜>",
-                "<｜tool▁call▁begin｜>",
-                "<｜tool▁sep｜>",
-                "<｜tool▁call▁end｜>",
-                "<｜tool▁calls▁end｜",
-            };
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Pass thinking context for DeepSeek V3.1 template
-    json additional_context = {
-        {"thinking", inputs.enable_thinking},
-    };
-
-    auto prompt = apply(tmpl, inputs,
-                       /* messages_override= */ inputs.messages,
-                       /* tools_override= */ std::nullopt,
-                       additional_context);
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    if (string_ends_with(data.prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<｜tool▁call▁begin｜>\" )? \"" + name + "<｜tool▁sep｜>"
-                    "\" " + builder.add_schema(name + "-args", parameters) + " "
-                    "\"<｜tool▁call▁end｜>\""));
-            });
-            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            // so we accept common variants (then it's all constrained)
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" | \"<｜tool\\\\_calls\\\\_begin｜>\" | \"<｜tool▁calls｜>\" ) "
-                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<｜tool▁calls▁end｜>\""
-                " space");
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<｜tool▁calls▁begin｜>",
-                "<｜tool▁call▁begin｜>",
-                "<｜tool▁sep｜>",
-                "<｜tool▁call▁end｜>",
-                "<｜tool▁calls▁end｜>",
-            };
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
-
-    // Handle thinking tags based on prompt ending
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!params.enable_thinking) {
-            // Close the thinking tag immediately if thinking is disabled
-            data.prompt += "</think>\n\n";
-        } else {
-            // Mark thinking as forced open (template started with <think>)
-            data.thinking_forced_open = true;
-        }
-    }
-
-    // Preserve MiniMax-M2 special tokens
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<minimax:tool_call>",
-        "</minimax:tool_call>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<minimax:tool_call>\n",
-        /* form.tool_start  = */ "<invoke name=\"",
-        /* form.tool_sep    = */ "\">\n",
-        /* form.key_start   = */ "<parameter name=\"",
-        /* form.key_val_sep = */ "\">",
-        /* form.val_end     = */ "</parameter>\n",
-        /* form.tool_end    = */ "</invoke>\n",
-        /* form.scope_end   = */ "</minimax:tool_call>",
-    };
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
-
-    data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
-        "<function=",
-        "</function>",
-        "<parameter=",
-        "</parameter>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<tool_call>\n",
-        /* form.tool_start  = */ "<function=",
-        /* form.tool_sep    = */ ">\n",
-        /* form.key_start   = */ "<parameter=",
-        /* form.key_val_sep = */ ">\n",
-        /* form.val_end     = */ "\n</parameter>\n",
-        /* form.tool_end    = */ "</function>\n",
-        /* form.scope_end   = */ "</tool_call>",
-    };
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_KIMI_K2;
-
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<|tool_calls_section_begin|>",
-        "<|tool_call_begin|>",
-        "<|tool_call_argument_begin|>",
-        "<|tool_call_end|>",
-        "<|tool_calls_section_end|>",
-        "<|im_end|>",
-        "<|im_system|>",
-        "<|im_middle|>",
-    };
-
-    data.additional_stops.insert(data.additional_stops.end(), {
-        "<|im_end|>",
-        "<|im_middle|>"
-    });
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<|tool_calls_section_begin|>";
-        form.tool_start  = "<|tool_call_begin|>";
-        form.tool_sep    = "<|tool_call_argument_begin|>{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}<|tool_call_end|>";
-        form.scope_end   = "<|tool_calls_section_end|>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
-
-    data.preserved_tokens = {
-        "<thinking>",
-        "</thinking>",
-        "<tool_calls>",
-        "</tool_calls>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_calls>[";
-        form.tool_start  = "{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}, ";
-        form.scope_end   = "]</tool_calls>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.last_tool_end = "}";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
-
-    data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "\n";
-        form.tool_start  = "<tool_call>\n{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}\n</tool_call>";
-        form.scope_end   = "";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template &    tmpl,
+                                                          const struct templates_params & inputs) {
     common_chat_params data;
 
     // Copy reasoning to the "thinking" field as expected by the gpt-oss template
     auto adjusted_messages = json::array();
     for (const auto & msg : inputs.messages) {
         auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
-        auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+        auto has_tool_calls        = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
 
         if (has_reasoning_content && has_tool_calls) {
-            auto adjusted_message = msg;
+            auto adjusted_message        = msg;
             adjusted_message["thinking"] = msg.at("reasoning_content");
             adjusted_messages.push_back(adjusted_message);
         } else {
@@ -2007,7 +900,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
         }
     }
 
-    auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
+    auto prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
 
     // Check if we need to replace the return token with end token during
     // inference and without generation prompt. For more details see:
@@ -2021,895 +914,118 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
     }
 
     data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_GPT_OSS;
+    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
 
     // These special tokens are required to parse properly, so we include them
     // even if parse_tool_calls is false.
     data.preserved_tokens = {
-        "<|channel|>",
-        "<|constrain|>",
-        "<|message|>",
-        "<|start|>",
-        "<|end|>",
+        "<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>",
     };
 
-    if (!inputs.json_schema.is_null()) {
-        data.grammar_lazy = false;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schema = inputs.json_schema;
-            builder.resolve_refs(schema);
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar   = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && has_tools;
 
-            auto not_end = builder.add_rule("not-end",
-                "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
-            auto analysis = builder.add_rule("analysis",
-                "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-            auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+");
-            auto final = builder.add_rule("final",
-                "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " +
-                builder.add_schema("response", schema)
-            );
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        const std::string END                = "<|end|>";
+        const std::string START              = "<|start|>";
+        const std::string MESSAGE            = "<|message|>";
+        const std::string CHANNEL            = "<|channel|>";
+        const std::string CONSTRAIN          = "<|constrain|>";
+        const std::string START_ASSISTANT    = START + "assistant";
+        const std::string CHANNEL_ANALYSIS   = CHANNEL + "analysis";
+        const std::string CHANNEL_COMMENTARY = CHANNEL + "commentary";
+        const std::string CHANNEL_FINAL      = CHANNEL + "final";
 
-            builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final);
-        });
-    }
+        auto the_end = END | p.end();
 
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            // tool calls can appear in commentary or analysis channels
-            auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )");
+        const std::string analysis_header  = CHANNEL_ANALYSIS + MESSAGE;
+        auto              segment_content  = p.until(END);
+        auto              analysis_segment = extract_reasoning ?
+                                                 p.literal(analysis_header) + p.reasoning(segment_content) + p.until(END) + the_end :
+                                                 p.content(analysis_header + p.until(END) + the_end);
 
-            std::vector<std::string> tool_rules_recipient_in_role;
-            std::vector<std::string> tool_rules_recipient_in_channel;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
+        auto channel_header_content = p.until_one_of({ " to=functions.", MESSAGE });
+        auto content_header         = p.choice({ p.literal(CHANNEL_COMMENTARY), p.literal(CHANNEL_FINAL) });
+        auto content_segment        = p.rule("content-segment", content_header + channel_header_content + MESSAGE +
+                                                                    p.content(segment_content) + the_end);
 
-                tool_rules_recipient_in_role.push_back(
-                    builder.add_rule(name + "-call",
-                        "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " +
-                        builder.add_schema(name + "-args", parameters)
-                    )
-                );
-
-                tool_rules_recipient_in_channel.push_back(
-                    builder.add_rule(name + "-call",
-                        "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " +
-                        builder.add_schema(name + "-args", parameters)
-                    )
-                );
-            });
-
-            auto recipient_in_channel = builder.add_rule("recipient_in_channel",
-                channel + " \" to=functions.\" ( " +
-                string_join(tool_rules_recipient_in_channel, " | ") + " )"
-            );
-
-            if (data.grammar_lazy) {
-                auto recipient_in_role = builder.add_rule("recipient_in_role",
-                    "\"<|start|>assistant\"? \" to=functions.\" ( " +
-                    string_join(tool_rules_recipient_in_role, " | ") + " )"
-                );
-
-                builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
-            } else {
-                auto not_end = builder.add_rule("not-end",
-                    "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
-                auto analysis = builder.add_rule("analysis",
-                    "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-                auto commentary = builder.add_rule("commentary",
-                    "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-
-                auto recipient_in_role = builder.add_rule("recipient_in_role",
-                    "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
-                );
-
-                builder.add_rule("root",
-                    "( " + analysis + " \"<|start|>assistant\" )? " +
-                    "( " + commentary + " \"<|start|>assistant\" )? " +
-                    "( " + recipient_in_role + " | " + recipient_in_channel + " )"
-                );
-            }
-
-            // Trigger on tool calls that appear in the commentary channel
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                "<\\|channel\\|>(?:commentary|analysis) to"
-            });
-
-            // Trigger tool calls that appear in the role section, either at the
-            // start or in the middle.
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                "^ to"
-            });
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                "<\\|start\\|>assistant to"
-            });
-        });
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    std::string prompt = apply(tmpl, inputs);
-
-    // match the existing trimming behavior
-    if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
-        prompt.erase(0, tmpl.bos_token().size());
-    }
-    if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
-        prompt.erase(prompt.size() - tmpl.eos_token().size());
-    }
-    if (string_ends_with(prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    // add GLM preserved tokens
-    data.preserved_tokens = {
-        "<|endoftext|>",
-        "[MASK]",
-        "[gMASK]",
-        "[sMASK]",
-        "<sop>",
-        "<eop>",
-        "<|system|>",
-        "<|user|>",
-        "<|assistant|>",
-        "<|observation|>",
-        "<|begin_of_image|>",
-        "<|end_of_image|>",
-        "<|begin_of_video|>",
-        "<|end_of_video|>",
-        "<|begin_of_audio|>",
-        "<|end_of_audio|>",
-        "<|begin_of_transcription|>",
-        "<|end_of_transcription|>",
-        "<|code_prefix|>",
-        "<|code_middle|>",
-        "<|code_suffix|>",
-        "/nothink",
-        "<think>",
-        "</think>",
-        "<tool_call>",
-        "</tool_call>",
-        "<arg_key>",
-        "</arg_key>",
-        "<arg_value>",
-        "</arg_value>"
-    };
-
-    // extra GLM 4.5 stop word
-    data.additional_stops.insert(data.additional_stops.end(), {
-        "<|user|>",
-        "<|observation|>"
-    });
-
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "",
-        /* form.tool_start  = */ "\n<tool_call>",
-        /* form.tool_sep    = */ "\n",
-        /* form.key_start   = */ "<arg_key>",
-        /* form.key_val_sep = */ "</arg_key>\n<arg_value>",
-        /* form.val_end     = */ "</arg_value>\n",
-        /* form.tool_end    = */ "</tool_call>\n",
-        /* form.scope_end   = */ "",
-    };
-    build_grammar_xml_tool_call(data, inputs.tools, form);
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_GLM_4_5;
-    return data;
-}
-
-static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    LOG_DBG("%s\n", __func__);
-    common_chat_params data;
-    const std::optional<json> additional_context = json {
-        {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
-        {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
-    };
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override =*/ std::nullopt, additional_context);
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
-        });
-        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
-        data.preserved_tokens = {
-            " functools[",
-        };
-        data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
-    // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
-    // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> first_tool_rules;
-            std::vector<std::string> subsequent_tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                std::string args_pattern = "[\\s\\S]*";
-                auto args_rule = builder.add_schema(name + "-args", parameters);
-                if (name == "python") {
-                    args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
-                } else {
-                    args_pattern = "\\{" + args_pattern;
-                }
-                auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
-                first_tool_rules.push_back(call_rule);
-                if (inputs.parallel_tool_calls) {
-                    subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
-                }
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                    "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
-                });
-            });
-            data.preserved_tokens = {
-                "<|end_header_id|>",
-            };
-            auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
-            if (inputs.parallel_tool_calls) {
-                auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
-                builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
-            } else {
-                builder.add_rule("root", first_rule);
-            }
-
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
-    common_chat_params data;
-
-    if (!inputs.tools.is_null()) {
-        std::string python_code_argument_name;
-        auto has_raw_python = false;
-
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                const auto & parameters = function.at("parameters");
-                std::string name = function.at("name");
-                if (name == "python" || name == "ipython") {
-                    if (!parameters.contains("type")) {
-                        throw std::runtime_error("Missing type in python tool");
-                    }
-                    has_raw_python = true;
-                    const auto & type = parameters.at("type");
-                    if (type == "object") {
-                        auto properties = parameters.at("properties");
-                        for (auto it = properties.begin(); it != properties.end(); ++it) {
-                            if (it.value().at("type") == "string") {
-                                if (!python_code_argument_name.empty()) {
-                                    throw std::runtime_error("Multiple string arguments found in python tool");
-                                }
-                                python_code_argument_name = it.key();
-                            }
-                        }
-                        if (python_code_argument_name.empty()) {
-                            throw std::runtime_error("No string argument found in python tool");
-                        }
-                    } else if (type != "string") {
-                        throw std::runtime_error("Invalid type in python tool: " + type.dump());
-                    }
-                }
-                tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
-            });
-            if (has_raw_python) {
-                tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
-                data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
-                data.preserved_tokens.push_back("<|python_tag|>");
-            }
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
-            builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
-            data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
-        });
-        data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-
-    data.prompt = apply(tmpl, inputs);
-    // TODO: if (has_raw_python)
-    return data;
-}
-
-static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    json extra_context = json {
-        {"enable_thinking", inputs.enable_thinking},
-    };
-    extra_context.update(inputs.extra_context);
-
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
-    data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!extra_context["enable_thinking"]) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (!inputs.tools.is_null()) {
-        // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            std::vector<std::string> tool_call_alts;
-            std::vector<std::string> escaped_names;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_schema(name + "-call", {
-                    {"type", "object"},
-                    {"properties", json {
-                        {"name", json {{"const", name}}},
-                        {"arguments", parameters},
-                    }},
-                    {"required", json::array({"name", "arguments"})},
-                }));
-                tool_call_alts.push_back(builder.add_rule(
-                    name + "-function-tag",
-                    "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
-                    builder.add_schema(name + "-args", parameters) + " "
-                    "\"</function>\" space"));
-
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
-                    "<function=" + name + ">",
-                });
-                auto escaped_name = regex_escape(name);
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                    "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
-                });
-                escaped_names.push_back(escaped_name);
-            });
-            auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
-            std::vector<std::string> alt_tags {
-                any_tool_call,
-                "\"<tool_call>\" space "     + any_tool_call + " \"</tool_call>\"",
-                // The rest is just to accommodate common "good bad" outputs.
-                "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
-                "\"<response>\"  space "     + any_tool_call + " \"</response>\"",
-                "\"<tools>\"     space "     + any_tool_call + " \"</tools>\"",
-                "\"<json>\"      space "     + any_tool_call + " \"</json>\"",
-                "\"<xml>\"      space "     + any_tool_call + " \"</xml>\"",
-                "\"<JSON>\"      space "     + any_tool_call + " \"</JSON>\"",
-            };
-            auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
-            tool_call_alts.push_back(wrappable_tool_call);
-            tool_call_alts.push_back(
-                "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-            // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "(</think>\\s*)" : "") + (
-                    "\\s*("
-                    "(?:<tool_call>"
-                    "|<function"
-                    "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
-                    "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
-                    ")"
-                    ")"
-                ),
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<tool_call>",
-                "</tool_call>",
-                "<function",
-                "<tools>",
-                "</tools>",
-                "<response>",
-                "</response>",
-                "<function_call>",
-                "</function_call>",
-                "<json>",
-                "</json>",
-                "<JSON>",
-                "</JSON>",
-                "```",
-                "```json",
-                "```xml",
-            };
-        });
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Pass thinking context for Granite template
-    json additional_context = {
-        {"thinking", inputs.enable_thinking},
-    };
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context);
-    data.format = COMMON_CHAT_FORMAT_GRANITE;
-
-    if (string_ends_with(data.prompt, "<think>\n") || string_ends_with(data.prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (!inputs.tools.is_null()) {
-        // Granite uses <|tool_call|> followed by JSON list
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name +
-"-args", {
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {{"const", name}}},
-                        {"arguments", parameters},
-                    }},
-                    {"required", json::array({"name", "arguments"})},
-                })));
-            });
-
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
-            auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"");
-
-            if (data.thinking_forced_open) {
-                builder.add_rule("root", "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list);
-            } else {
-                builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list);
-            }
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
-                "<|tool_call|>"
-            });
-
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<response>",
-                "</response>",
-                "<|tool_call|>",
-            };
-        });
-    } else {
-        // Handle thinking tags for non-tool responses
-        if (data.thinking_forced_open && inputs.enable_thinking) {
-            data.grammar_lazy = false;
-            data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-                builder.add_rule("root", "\"</think>\" space \"<response>\" space .* \"</response>\" space");
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<response>",
-                "</response>",
-            };
-        }
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Copy `reasoning_content` to `reasoning`
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
-            auto adjusted_message = msg;
-            adjusted_message["reasoning"] = msg.at("reasoning_content");
-            adjusted_message.erase("reasoning_content");
-            adjusted_messages.push_back(adjusted_message);
-        } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
-
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
-    auto include_grammar = true;
-
-    auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-
-    // Check if we need to replace the flush token with end token during inference and without generation prompt.
-    if (inputs.is_inference && !inputs.add_generation_prompt) {
-        static constexpr std::string_view return_token = "<|flush|>";
-        static constexpr std::string_view end_token    = "<|end|>";
-        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
-            prompt.replace(pos, return_token.length(), end_token);
-        }
-    }
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
-    data.preserved_tokens = {
-        "<|think|>",
-        "<|content|>",
-        "<|begin|>",
-        "<|end|>",
-        "<|tool_calls|>",
-        "<|tool_call:begin|>",
-        "<|tool_call:end|>",
-        "<|tool_call:name|>",
-        "<|tool_call:args|>",
-    };
-
-    auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
-        auto lit_think = p.atomic(p.literal("<|think|>"));
-        auto lit_assistant_begin = p.atomic(p.literal("<|begin|>assistant"));
-        auto lit_content = p.atomic(p.literal("<|content|>"));
-        auto lit_end = p.atomic(p.literal("<|end|>"));
-        auto parser_until_end = p.until("<|end|>");
-
-        // reasoning <- "<|think|>" (!"<|end|>" .)*
-        auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end));
-
-        // content <- "<|content|>" (!"<|end|>" .)*
-        auto parser_content = p.rule("content", lit_content + p.content(parser_until_end));
-
-        // wrap_choice(items) <- item-choice wrapped*
-        // item-choice        <- items[0] / ... / items[n]
-        // wrapped            <- "<|end|><|begin|>assistant" item-choice
-        auto wrap_choice = [&](const std::vector<common_peg_parser> & items) {
-            auto choice = p.choice(items);
-            return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice);
-        };
-
-        // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ...
-        auto wrap_seq = [&](const std::vector<common_peg_parser> & items) {
-            auto seq = p.sequence();
-            for (auto i = 0u; i < items.size(); i++) {
-                if (i == 0) {
-                    seq += items[i];
-                    continue;
-                }
-                seq += lit_end + lit_assistant_begin + items[i];
-            }
-            return seq;
-        };
-
-        // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
-            auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema));
-            return p.choice({
-                wrap_seq({parser_reasoning, parser_response_format}),
-                wrap_seq({parser_response_format})
-            });
+        if (!inputs.json_schema.is_null()) {
+            auto final_header = p.literal(CHANNEL_FINAL);
+            auto constraint   = p.optional(p.space() + p.literal(CONSTRAIN) + channel_header_content);
+            return p.optional(analysis_segment) + final_header + constraint + MESSAGE +
+                   p.content(p.schema(p.json(), "response-format", inputs.json_schema));
         }
 
-        auto lit_tool_call_begin = p.literal("<|tool_call:begin|>");
-        auto lit_tool_call_name = p.literal("<|tool_call:name|>");
-        auto lit_tool_call_args = p.literal("<|tool_call:args|>");
-        auto lit_tool_call_end = p.literal("<|tool_call:end|>");
+        auto segment  = p.optional(START_ASSISTANT + p.space()) + p.choice({ content_segment, analysis_segment });
+        auto contents = p.optional(segment + p.repeat(p.optional(p.space()) + segment, 0, -1)) + p.end();
 
         // Tool call parser
         if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
-            auto parser_tool_call = p.choice();
+            auto tool_choice = p.choice();
+
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                const auto & schema = function.at("parameters");
+                std::string  name     = function.at("name");
+                const auto & params   = function.at("parameters");
 
-                // tool(name, schema) <- name "<|tool_call:args|>" schema
-                parser_tool_call |= p.rule("tool-" + name,
-                    p.atomic(p.tool_name(p.literal(name)) + lit_tool_call_args)
-                    + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
+                // Tool call can appear as:
+                // 1. In role header: " to=functions.NAME<|channel|>..."
+                // 2. In channel: "<|channel|>(analysis|commentary) to=functions.NAME..."
+                auto func_name = p.literal(" to=functions.") + p.tool_name(p.literal(name));
+
+                auto channel    = p.literal(CHANNEL_COMMENTARY) | p.literal(CHANNEL_ANALYSIS);
+                auto constraint = p.space() + p.optional(p.literal(CONSTRAIN) + channel_header_content);
+                auto args       = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", params));
+
+                // Pattern 1: recipient in role header
+                // " to=functions.NAME<|channel|>(analysis|commentary)[constraint]<|message|>ARGS"
+                auto tool_in_role = p.tool(p.tool_open(func_name + channel) + constraint + MESSAGE + args);
+
+                // Pattern 2: recipient in channel header
+                // "<|channel|>(analysis|commentary) to=functions.NAME[constraint]<|message|>ARGS"
+
+                auto tool_in_channel = p.tool(channel + p.tool_open(func_name + constraint + MESSAGE) + args);
+
+                tool_choice |= p.trigger_rule("tool-" + name, tool_in_role | tool_in_channel);
             });
 
             auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
             auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
 
-            // tool-calls  <- "<|tool_calls|>" tool-call+
-            // tool-call   <- "<|tool_call:begin|> call-id "<|tool_call:name|>" &([^<]+ "<|tool_call:args|>") tool-choice "<|tool_call:end|>"
-            // call-id     <- [a-zA-Z0-9_-]+
-            // tool-choice <- tool(t[0].name, t[0].schema) / ... / tool(t[n].name, t[n].schema)
-            auto parser_tool_calls = p.trigger_rule("tool-calls",
-                p.atomic(p.literal("<|tool_calls|>"))
-                + p.repeat(
-                    p.tool_open(
-                        lit_tool_call_begin
-                        + p.tool_id(p.chars("[a-zA-Z0-9_-]", 1, -1))
-                        + lit_tool_call_name
-                        + p.peek(p.chars("[^<]", 1, -1) + lit_tool_call_args))
-                    + parser_tool_call
-                    + p.tool_close(lit_tool_call_end),
-                /* min = */ 1,
-                /* max = */ max_calls));
+            auto role_start = p.optional(p.space() + p.literal(START_ASSISTANT));
+            auto tool_call  = p.rule("tool-call", p.repeat(role_start + tool_choice, min_calls, max_calls) + p.end());
 
-            if (min_calls == 1) {
-                // If required, then try any combination of the reasoning, content, and tool call
-                return p.choice({
-                    wrap_seq({parser_reasoning, parser_content, parser_tool_calls}),
-                    wrap_seq({parser_reasoning, parser_tool_calls}),
-                    wrap_seq({parser_content, parser_tool_calls}),
-                    wrap_seq({parser_tool_calls})
-                });
-            }
-
-            return wrap_choice({parser_reasoning, parser_content, parser_tool_calls});
+            return p.choice({ tool_call, p.one_or_more(segment) + tool_call });
         }
 
-        // Content only parser
-        include_grammar = false;
-        return wrap_choice({parser_reasoning, parser_content});
+        return contents;
     });
 
     data.parser = parser.save();
 
     if (include_grammar) {
         data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                 builder.resolve_refs(schema);
             });
             parser.build_grammar(builder, data.grammar_lazy);
         });
 
         data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls|>"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)"               },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "(?:<\\|end\\|>)(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
+             "(?:<\\|start\\|>assistant\\s*)?(<\\|channel\\|>(?:commentary|analysis)\\s+to=functions)"                }
         };
     }
 
     return data;
 }
 
-static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_EXAONE_MOE;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>\n\n";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                // Expect: <tool_call>{"name": "<name>", "arguments": {...}}</tool_call>
-                tool_rules.push_back(builder.add_rule(
-                    name + "-call",
-                    "\"<tool_call>\" space " +
-                        builder.add_schema(name + "-obj", json{
-                            {"type", "object"},
-                            {"properties", {
-                                {"name",      json{{"const", name}}},
-                                {"arguments", parameters},
-                            }},
-                            {"required", json::array({"name", "arguments"})},
-                        }) +
-                    " space \"</tool_call>\" space"));
-            });
-
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)?" : "") +
-                    "(<tool_call>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<tool_call>",
-                "</tool_call>",
-            };
-        });
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // This template does not support tools or reasoning
-    // we just need to transform the messages into the correct schema
-
-    templates_params inputs_new = inputs;
-    json & messages = inputs_new.messages;
-
-    // default to chat_template_kwargs, or en-GB if not specified
-    std::string default_src_lang = inputs.extra_context.value("source_lang_code", "en-GB");
-    std::string default_tgt_lang = inputs.extra_context.value("target_lang_code", "en-GB");
-
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("role") && message["role"].get<std::string>() != "user") {
-            continue;
-        }
-        if (!message.contains("content")) {
-            message["content"] = json::array();
-        }
-        if (message.contains("content") && !message["content"].is_array()) {
-            auto content_str = message["content"].get<std::string>();
-            // default to en-GB if not specified (to make common_chat_format_example works)
-            auto src_lang = message.contains("source_lang_code")
-                        ? message["source_lang_code"].get<std::string>() : default_src_lang;
-            auto tgt_lang = message.contains("target_lang_code")
-                        ? message["target_lang_code"].get<std::string>() : default_tgt_lang;
-            message["content"] = json::array({
-                json{
-                    {"type", "text"},
-                    {"text", content_str},
-                    {"source_lang_code", src_lang},
-                    {"target_lang_code", tgt_lang},
-                }
-            });
-        }
-    }
-
-    data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt);
-    data.format = COMMON_CHAT_FORMAT_GENERIC;
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    data.grammar_lazy = false;
-    if (!inputs.json_schema.is_null()) {
-        if (!inputs.grammar.empty()) {
-            throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
-        }
-        data.grammar = json_schema_to_grammar(inputs.json_schema);
-    } else {
-        data.grammar = inputs.grammar;
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_seed_oss(
-    const common_chat_template         & tmpl,
-    templates_params                   & params,
-    const common_chat_templates_inputs & inputs)
-{
-    common_chat_params data;
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_SEED_OSS;
-    if (string_ends_with(data.prompt, "<seed:think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</seed:think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (params.tools.is_array() && !params.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(params.tools, [&](const json & tool) {
-                const auto & function   = tool.at("function");
-                std::string  name       = function.at("name");
-                auto         parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                // Create rule for Seed-OSS function call format
-                std::string param_rules;
-                if (parameters.contains("properties")) {
-                    for (const auto & [key, value] : parameters.at("properties").items()) {
-                        param_rules += "\"<parameter=" + key + ">\"" + builder.add_schema(name + "-arg-" + key, value) +
-                                       "\"</parameter>\"";
-                    }
-                }
-
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                                                      "\"<seed:tool_call>\" space \"<function=" + name + ">\" space " +
-                                                          param_rules +
-                                                          " \"</function>\" space \"</seed:tool_call>\""));
-            });
-
-            data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<seed:tool_call>" });
-
-            data.preserved_tokens = {
-                "<seed:think>", "</seed:think>", "<seed:tool_call>", "</seed:tool_call>",
-                "<function=",   "</function>",   "<parameter=",      "</parameter>",
-            };
-
-            builder.add_rule("root", string_join(tool_rules, " | "));
-        });
-    }
-    return data;
-}
-
-// various workarounds for known issues with certain templates or model behaviors
-// TODO @ngxson : improve this (how?)
 namespace workaround {
 
 // if first message is system and template does not support it, merge it with next message
@@ -2958,70 +1074,8 @@ static void func_args_not_string(json & messages) {
     }
 }
 
-static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) {
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("tool_calls")) {
-            auto tool_calls_new = json{
-                {"tool_calls", message.at("tool_calls")}
-            };
-            message.erase("tool_calls");
-            auto content = message.at("content");
-            std::string content_new = content.is_null() ? "" : content.get<std::string>();
-            message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace);
-        }
-    }
-}
-
-// TODO @ngxson : we may remove support for generic schema in the future
-static void use_generic_schema(json & messages) {
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("tool_calls") && message.at("tool_calls").is_array()) {
-            auto & tool_calls = message.at("tool_calls");
-            for (auto & tool_call : tool_calls) {
-                if (tool_call.contains("type") && tool_call.at("type") == "function" &&
-                    tool_call.contains("function") && tool_call.at("function").is_object()) {
-                    // Copy values before erasing to avoid use-after-free
-                    json name_value;
-                    json arguments_value;
-                    json id_value;
-                    const auto & function = tool_call.at("function");
-                    if (function.contains("name")) {
-                        name_value = function.at("name");
-                    }
-                    if (function.contains("arguments")) {
-                        arguments_value = function.at("arguments");
-                    }
-                    if (tool_call.contains("id")) {
-                        id_value = tool_call.at("id");
-                    }
-                    // Now safely erase and assign in the correct order
-                    tool_call.erase("type");
-                    tool_call.erase("function");
-                    tool_call.erase("id");
-                    // Reassign in desired order: name, arguments, id
-                    if (!name_value.is_null()) {
-                        tool_call["name"] = name_value;
-                    }
-                    if (!arguments_value.is_null()) {
-                        tool_call["arguments"] = arguments_value;
-                    }
-                    if (!id_value.is_null()) {
-                        tool_call["id"] = id_value;
-                    }
-                }
-            }
-        }
-    }
-}
-
-} // namespace workaround
-
-static common_chat_params common_chat_templates_apply_jinja(
-    const struct common_chat_templates        * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
+static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates *        tmpls,
+                                                            const struct common_chat_templates_inputs & inputs) {
     templates_params params;
     params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
     const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
@@ -3056,235 +1110,56 @@ static common_chat_params common_chat_templates_apply_jinja(
         params.json_schema = json::parse(inputs.json_schema);
     }
 
-    if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
-        LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
-        params.parallel_tool_calls = false;
-    } else {
-        params.parallel_tool_calls = inputs.parallel_tool_calls;
-    }
+    // if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
+    //     LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
+    //     params.parallel_tool_calls = false;
+    // } else {
+    params.parallel_tool_calls = inputs.parallel_tool_calls;
+    //}
 
     if (params.tools.is_array()) {
         if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
             throw std::runtime_error("Cannot specify grammar with tools");
         }
         if (caps.supports_tool_calls && !caps.supports_tools) {
-            LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
+            LOG_WRN(
+                "Template supports tool calls but does not natively describe tools. The fallback behaviour used may "
+                "produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
         }
     }
 
-    // DeepSeek V3.1: detect based on specific patterns in the template
-    if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
-        params.json_schema.is_null()) {
-        return common_chat_params_init_deepseek_v3_1(tmpl, params);
-    }
-
-    // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
-    if (src.find("<｜tool▁calls▁begin｜>") != std::string::npos && params.json_schema.is_null()) {
-        return common_chat_params_init_deepseek_r1(tmpl, params);
-    }
-
-    // Command R7B: : use handler in all cases except json schema (thinking / tools).
-    if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_command_r7b(tmpl, params);
-    }
-
-    // Granite (IBM) - detects thinking / tools support
-    if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        workaround::use_generic_schema(params.messages);
-        workaround::move_tool_calls_to_content(params.messages);
-        return common_chat_params_init_granite(tmpl, params);
-    }
-
-    // GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
-    if (src.find("[gMASK]<sop>") != std::string::npos &&
-        src.find("<arg_key>") != std::string::npos &&
-        src.find("<arg_value>") != std::string::npos &&
-        params.json_schema.is_null()) {
-        workaround::func_args_not_string(params.messages);
-        if (!params.extra_context.contains("clear_thinking")) {
-            // by default, do not clear reasoning_content (added since GLM-4.7)
-            params.extra_context["clear_thinking"] = false;
-        }
-        return common_chat_params_init_glm_4_5(tmpl, params);
-    }
-
-    // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
-    // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
-    // Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
-    if (src.find("<tool_call>") != std::string::npos &&
-        src.find("<function>") != std::string::npos &&
-        src.find("<function=") != std::string::npos &&
-        src.find("<parameters>") != std::string::npos &&
-        src.find("<parameter=") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        // Nemotron 3 Nano 30B A3B
-        if (src.find("<think>") != std::string::npos) {
-            return common_chat_params_init_nemotron_v3(tmpl, params);
-        }
-        return common_chat_params_init_qwen3_coder_xml(tmpl, params);
-    }
-
-    // Xiaomi MiMo format detection (must come before Hermes 2 Pro)
-    if (src.find("<tools>") != std::string::npos &&
-        src.find("# Tools") != std::string::npos &&
-        src.find("</tools>") != std::string::npos &&
-        src.find("<tool_calls>") != std::string::npos &&
-        src.find("</tool_calls>") != std::string::npos &&
-        src.find("<tool_response>") != std::string::npos) {
-        return common_chat_params_init_xiaomi_mimo(tmpl, params);
-    }
-
-    // EXAONE MoE format detection
-    if (src.find("<tool_call>") != std::string::npos &&
-        src.find("<tool_result>") != std::string::npos &&
-        src.find("<|tool_declare|>") != std::string::npos) {
-        return common_chat_params_init_exaone_moe(tmpl, params);
-    }
-
-    // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
-    if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
-        return common_chat_params_init_hermes_2_pro(tmpl, params);
-    }
-
-    // GPT-OSS
-    if (src.find("<|channel|>") != std::string::npos) {
-        return common_chat_params_init_gpt_oss(tmpl, params);
-    }
-
-    // Seed-OSS
-    if (src.find("<seed:think>") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_seed_oss(tmpl, params, inputs);
-    }
-
-    // Nemotron v2
-    if (src.find("<SPECIAL_10>") != std::string::npos) {
-        return common_chat_params_init_nemotron_v2(tmpl, params);
-    }
-
-    // Apertus format detection
-    if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
-        return common_chat_params_init_apertus(tmpl, params);
-    }
-
-    // LFM2 (w/ tools)
-    if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
-        src.find("]<|tool_list_end|>") != std::string::npos) {
-        return common_chat_params_init_lfm2(tmpl, params);
-    }
-
-    // MiniMax-M2 format detection
-    if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_minimax_m2(tmpl, params);
-    }
-
-    // Kimi K2 format detection
-    if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
-        src.find("<|tool_calls_section_begin|>") != std::string::npos &&
-        src.find("## Return of") != std::string::npos) {
-        return common_chat_params_init_kimi_k2(tmpl, params);
-    }
-
-    // Apriel 1.5 format detection
-    if (src.find("<thinking>") != std::string::npos &&
-        src.find("</thinking>") != std::string::npos &&
-        src.find("<available_tools>") != std::string::npos &&
-        src.find("<|assistant|>") != std::string::npos &&
-        src.find("<|tool_result|>") != std::string::npos &&
-        src.find("<tool_calls>[") != std::string::npos &&
-        src.find("]</tool_calls>") != std::string::npos) {
-        return common_chat_params_init_apriel_1_5(tmpl, params);
-    }
-
-    // Solar Open
-    if (src.find("<|tool_response:begin|>") != std::string::npos &&
-        src.find("<|tool_response:name|>") != std::string::npos &&
-        src.find("<|tool_response:result|>") != std::string::npos) {
-        return common_chat_params_init_solar_open(tmpl, params);
-    }
-
-    // Use generic handler when mixing tools + JSON schema.
-    // TODO: support that mix in handlers below.
-    if ((params.tools.is_array() && params.json_schema.is_object())) {
-        return common_chat_params_init_generic(tmpl, params);
-    }
-
-    // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
-    if (src.find(">>>all") != std::string::npos) {
-        return common_chat_params_init_functionary_v3_2(tmpl, params);
-    }
-
-    // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
-    if (src.find(" functools[") != std::string::npos) {
-        return common_chat_params_init_firefunction_v2(tmpl, params);
-    }
-
-    // Functionary v3.1 (w/ tools)
-    if (src.find("<|start_header_id|>") != std::string::npos
-        && src.find("<function=") != std::string::npos) {
-        return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
-    }
-
-    // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
-    if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
-        auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
-    }
-
-    // Ministral/Mistral Large 3
-    if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
-        src.find("[TOOL_CALLS]") != std::string::npos &&
-        src.find("[ARGS]") != std::string::npos) {
+    // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
+    // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
+    if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
+        src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) {
+        LOG_INF("Using specialized template: Ministral/Magistral Large 3\n");
         return common_chat_params_init_ministral_3(tmpl, params);
     }
 
-    if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
-        return common_chat_params_init_magistral(tmpl, params);
+    // GPT-OSS - has unique channel-based structure that needs dedicated handler
+    if (src.find("<|channel|>") != std::string::npos) {
+        LOG_INF("Using specialized template: GPT-OSS\n");
+        return common_chat_params_init_gpt_oss(tmpl, params);
     }
 
-    // Solar Open
-    if (src.find("<|tool_response:begin|>") != std::string::npos &&
-        src.find("<|tool_response:name|>") != std::string::npos &&
-        src.find("<|tool_response:result|>") != std::string::npos) {
-        return common_chat_params_init_solar_open(tmpl, params);
+    try {
+        LOG_INF("Using autoparser for template analysis\n");
+        template_analysis_result analysis    = template_analyzer::analyze_template(tmpl);
+        auto                     auto_params = universal_peg_generator::generate_parser(analysis, tmpl, params);
+        return auto_params;
+    } catch (const std::exception & e) {
+        LOG_WRN("Automatic parser generation failed: %s\n", e.what());
     }
 
-    // TranslateGemma
-    if (src.find("[source_lang_code]") != std::string::npos &&
-        src.find("[target_lang_code]") != std::string::npos) {
-        return common_chat_params_init_translate_gemma(tmpl, params);
-    }
-
-    // Plain handler (no tools)
-    if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
-        return common_chat_params_init_without_tools(tmpl, params);
-    }
-
-    // Mistral Nemo (w/ tools)
-    if (src.find("[TOOL_CALLS]") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_mistral_nemo(tmpl, params);
-    }
-
-    // Generic fallback
-    workaround::func_args_not_string(params.messages);
-    workaround::use_generic_schema(params.messages);
-    workaround::move_tool_calls_to_content(params.messages);
-    return common_chat_params_init_generic(tmpl, params);
+    GGML_ABORT("Unable to generate parser for this template.");
 }
 
 // Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template.
-static common_chat_params common_chat_templates_apply_legacy(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
-    size_t alloc_size = 0;
+static common_chat_params common_chat_templates_apply_legacy(const struct common_chat_templates *        tmpls,
+                                                             const struct common_chat_templates_inputs & inputs) {
+    size_t                          alloc_size = 0;
     std::vector<llama_chat_message> chat;
-    std::vector<std::string> contents;
+    std::vector<std::string>        contents;
 
     for (const auto & msg : inputs.messages) {
         auto content = msg.content;
@@ -3294,25 +1169,27 @@ static common_chat_params common_chat_templates_apply_legacy(
                 continue;
             }
             if (!content.empty()) {
-                content += "\n";;
+                content += "\n";
+                ;
             }
             content += part.text;
         }
         contents.emplace_back(std::move(content));
     }
     for (size_t i = 0; i < contents.size(); ++i) {
-        const auto & msg = inputs.messages[i];
+        const auto & msg     = inputs.messages[i];
         const auto & content = contents[i];
-        chat.push_back({msg.role.c_str(), content.c_str()});
+        chat.push_back({ msg.role.c_str(), content.c_str() });
         size_t msg_size = msg.role.size() + content.size();
-        alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
+        alloc_size += msg_size + (msg_size / 4);  // == msg_size * 1.25 but avoiding float ops
     }
 
     std::vector<char> buf(alloc_size);
 
     // run the first time to get the total output length
     const auto & src = tmpls->template_default->source();
-    int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+    int32_t      res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt,
+                                                 buf.data(), buf.size());
 
     // error: chat template is not supported
     if (res < 0) {
@@ -3324,7 +1201,8 @@ static common_chat_params common_chat_templates_apply_legacy(
     // if it turns out that our buffer is too small, we resize it
     if ((size_t) res > buf.size()) {
         buf.resize(res);
-        res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+        res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(),
+                                        buf.size());
     }
 
     // for safety, we check the result again
@@ -3342,14 +1220,75 @@ static common_chat_params common_chat_templates_apply_legacy(
     return params;
 }
 
-common_chat_params common_chat_templates_apply(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
+common_chat_params common_chat_templates_apply(const struct common_chat_templates *        tmpls,
+                                               const struct common_chat_templates_inputs & inputs) {
     GGML_ASSERT(tmpls != nullptr);
-    return inputs.use_jinja
-        ? common_chat_templates_apply_jinja(tmpls, inputs)
-        : common_chat_templates_apply_legacy(tmpls, inputs);
+    return inputs.use_jinja ? common_chat_templates_apply_jinja(tmpls, inputs) :
+                              common_chat_templates_apply_legacy(tmpls, inputs);
+}
+
+common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+    return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
+}
+
+common_chat_msg common_chat_peg_parse(const common_peg_arena &   parser,
+                                      const std::string &        input,
+                                      bool                       is_partial,
+                                      const common_chat_syntax & syntax) {
+    if (parser.empty()) {
+        throw std::runtime_error("Failed to parse due to missing parser definition.");
+    }
+
+    LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
+
+    common_peg_parse_context ctx(input, is_partial);
+    ctx.debug   = syntax.debug;
+    auto result = parser.parse(ctx);
+
+    if (result.fail()) {
+        // During partial parsing, return partial results if any AST nodes were captured
+        // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK
+        if (is_partial && result.end > 0) {
+            // Try to extract any partial results from what was successfully parsed
+            common_chat_msg msg;
+            msg.role = "assistant";
+            if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
+                auto mapper = common_chat_peg_unified_mapper(msg);
+                mapper.from_ast(ctx.ast, result);
+            } else {
+                auto mapper = common_chat_peg_mapper(msg);
+                mapper.from_ast(ctx.ast, result);
+            }
+            if (ctx.debug) {
+                fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str());
+                fflush(stderr);
+            }
+            return msg;
+        }
+        throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " +
+                                 input.substr(result.end));
+    }
+
+    common_chat_msg msg;
+    msg.role = "assistant";
+
+    if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
+        auto mapper = common_chat_peg_unified_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+    } else {
+        // Generic mapper
+        auto mapper = common_chat_peg_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+    }
+    if (ctx.debug) {
+        fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str());
+        fflush(stderr);
+    }
+
+    if (!is_partial) {
+        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({ msg }).at(0).dump().c_str());
+    }
+    return msg;
 }
 
 std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates) {
diff --git a/common/chat.h b/common/chat.h
index 24aa4aab5c..feaebedce7 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -3,12 +3,21 @@
 #pragma once
 
 #include "common.h"
+#include "jinja/parser.h"
+#include "nlohmann/json_fwd.hpp"
 #include "peg-parser.h"
-#include <functional>
+#include "jinja/runtime.h"
+#include "jinja/caps.h"
+#include "nlohmann/json.hpp"
+
 #include <chrono>
+#include <functional>
+#include <map>
 #include <string>
 #include <vector>
-#include <map>
+
+using chat_template_caps = jinja::caps;
+using json = nlohmann::ordered_json;
 
 #include <nlohmann/json_fwd.hpp>
 
@@ -38,21 +47,85 @@ struct common_chat_msg_content_part {
     }
 };
 
+struct common_chat_template {
+    jinja::program prog;
+    std::string bos_tok;
+    std::string eos_tok;
+    std::string src;
+    chat_template_caps caps;
+
+    common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
+        jinja::lexer lexer;
+        auto lexer_res = lexer.tokenize(src);
+        this->prog = jinja::parse_from_tokens(lexer_res);
+
+        this->src = lexer_res.source;
+        this->bos_tok = bos_token;
+        this->eos_tok = eos_token;
+
+        this->caps = jinja::caps_get(prog);
+        // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
+    }
+
+    const std::string & source() const { return src; }
+    const std::string & bos_token() const { return bos_tok; }
+    const std::string & eos_token() const { return eos_tok; }
+
+    // TODO: this is ugly, refactor it somehow
+    json add_system(const json & messages, const std::string & system_prompt) const {
+        GGML_ASSERT(messages.is_array());
+        auto msgs_copy = messages;
+        if (!caps.supports_system_role) {
+            if (msgs_copy.empty()) {
+                msgs_copy.insert(msgs_copy.begin(), json{
+                    {"role", "user"},
+                    {"content", system_prompt}
+                });
+            } else {
+                auto & first_msg = msgs_copy[0];
+                if (!first_msg.contains("content")) {
+                    first_msg["content"] = "";
+                }
+                first_msg["content"] = system_prompt + "\n\n"
+                    + first_msg["content"].get<std::string>();
+            }
+        } else {
+            if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
+                msgs_copy.insert(msgs_copy.begin(), json{
+                    {"role", "system"},
+                    {"content", system_prompt}
+                });
+            } else if (msgs_copy[0].at("role") == "system") {
+                msgs_copy[0]["content"] = system_prompt;
+            }
+        }
+        return msgs_copy;
+    }
+
+    chat_template_caps original_caps() const {
+        return caps;
+    }
+
+};
+
 struct common_chat_msg {
-    std::string role;
-    std::string content;
+    std::string                               role;
+    std::string                               content;
     std::vector<common_chat_msg_content_part> content_parts;
-    std::vector<common_chat_tool_call> tool_calls;
-    std::string reasoning_content;
-    std::string tool_name;
-    std::string tool_call_id;
+    std::vector<common_chat_tool_call>        tool_calls;
+    std::string                               reasoning_content;
+    std::string                               tool_name;
+    std::string                               tool_call_id;
 
     nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
 
     bool empty() const {
-        return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
+        return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() &&
+               tool_name.empty() && tool_call_id.empty();
     }
-    void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
+
+    void set_tool_call_ids(std::vector<std::string> &           ids_cache,
+                           const std::function<std::string()> & gen_tool_call_id) {
         for (auto i = 0u; i < tool_calls.size(); i++) {
             if (ids_cache.size() <= i) {
                 auto id = tool_calls[i].id;
@@ -64,32 +137,28 @@ struct common_chat_msg {
             tool_calls[i].id = ids_cache[i];
         }
     }
+
     bool operator==(const common_chat_msg & other) const {
-        return role == other.role
-            && content == other.content
-            && content_parts == other.content_parts
-            && tool_calls == other.tool_calls
-            && reasoning_content == other.reasoning_content
-            && tool_name == other.tool_name
-            && tool_call_id == other.tool_call_id;
-    }
-    bool operator!=(const common_chat_msg & other) const {
-        return !(*this == other);
+        return role == other.role && content == other.content && content_parts == other.content_parts &&
+               tool_calls == other.tool_calls && reasoning_content == other.reasoning_content &&
+               tool_name == other.tool_name && tool_call_id == other.tool_call_id;
     }
+
+    bool operator!=(const common_chat_msg & other) const { return !(*this == other); }
 };
 
 struct common_chat_msg_diff {
-    std::string reasoning_content_delta;
-    std::string content_delta;
-    size_t tool_call_index = std::string::npos;
+    std::string           reasoning_content_delta;
+    std::string           content_delta;
+    size_t                tool_call_index = std::string::npos;
     common_chat_tool_call tool_call_delta;
 
-    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
+    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv,
+                                                           const common_chat_msg & msg_new);
 
     bool operator==(const common_chat_msg_diff & other) const {
-        return content_delta == other.content_delta
-        && tool_call_index == other.tool_call_index
-        && tool_call_delta == other.tool_call_delta;
+        return content_delta == other.content_delta && tool_call_index == other.tool_call_index &&
+               tool_call_delta == other.tool_call_delta;
     }
 };
 
@@ -107,64 +176,37 @@ enum common_chat_tool_choice {
 
 enum common_chat_format {
     COMMON_CHAT_FORMAT_CONTENT_ONLY,
-    COMMON_CHAT_FORMAT_GENERIC,
-    COMMON_CHAT_FORMAT_MISTRAL_NEMO,
-    COMMON_CHAT_FORMAT_MAGISTRAL,
-    COMMON_CHAT_FORMAT_LLAMA_3_X,
-    COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-    COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
-    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
-    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-    COMMON_CHAT_FORMAT_HERMES_2_PRO,
-    COMMON_CHAT_FORMAT_COMMAND_R7B,
-    COMMON_CHAT_FORMAT_GRANITE,
-    COMMON_CHAT_FORMAT_GPT_OSS,
-    COMMON_CHAT_FORMAT_SEED_OSS,
-    COMMON_CHAT_FORMAT_NEMOTRON_V2,
-    COMMON_CHAT_FORMAT_APERTUS,
-    COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
-    COMMON_CHAT_FORMAT_GLM_4_5,
-    COMMON_CHAT_FORMAT_MINIMAX_M2,
-    COMMON_CHAT_FORMAT_KIMI_K2,
-    COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
-    COMMON_CHAT_FORMAT_APRIEL_1_5,
-    COMMON_CHAT_FORMAT_XIAOMI_MIMO,
-    COMMON_CHAT_FORMAT_SOLAR_OPEN,
-    COMMON_CHAT_FORMAT_EXAONE_MOE,
 
     // These are intended to be parsed by the PEG parser
     COMMON_CHAT_FORMAT_PEG_SIMPLE,
     COMMON_CHAT_FORMAT_PEG_NATIVE,
-    COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
 
-    COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
+    COMMON_CHAT_FORMAT_COUNT,  // Not a format, just the # formats
 };
 
 struct common_chat_templates_inputs {
-    std::vector<common_chat_msg> messages;
-    std::string grammar;
-    std::string json_schema;
-    bool add_generation_prompt = true;
-    bool use_jinja = true;
+    std::vector<common_chat_msg>          messages;
+    std::string                           grammar;
+    std::string                           json_schema;
+    bool                                  add_generation_prompt = true;
+    bool                                  use_jinja             = true;
     // Parameters below only supported when use_jinja is true
-    std::vector<common_chat_tool> tools;
-    common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
-    bool parallel_tool_calls = false;
+    std::vector<common_chat_tool>         tools;
+    common_chat_tool_choice               tool_choice         = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    bool                                  parallel_tool_calls = false;
     common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
-    bool enable_thinking = true;
-    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
-    std::map<std::string, std::string> chat_template_kwargs;
-    bool add_bos = false;
-    bool add_eos = false;
+    bool                                  enable_thinking     = true;
+    std::chrono::system_clock::time_point now                 = std::chrono::system_clock::now();
+    std::map<std::string, std::string>    chat_template_kwargs;
+    bool                                  add_bos = false;
+    bool                                  add_eos = false;
 };
 
 struct common_chat_params {
     common_chat_format                  format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
     std::string                         prompt;
     std::string                         grammar;
-    bool                                grammar_lazy = false;
+    bool                                grammar_lazy         = false;
     bool                                thinking_forced_open = false;
     std::vector<common_grammar_trigger> grammar_triggers;
     std::vector<std::string>            preserved_tokens;
@@ -175,13 +217,14 @@ struct common_chat_params {
 // per-message parsing syntax
 // should be derived from common_chat_params
 struct common_chat_parser_params {
-    common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
+    common_chat_format      format               = COMMON_CHAT_FORMAT_CONTENT_ONLY;
     common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
     // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
-    bool                     reasoning_in_content  = false;
-    bool                     thinking_forced_open  = false;
-    bool                     parse_tool_calls      = true;
-    common_peg_arena         parser                = {};
+    bool                    reasoning_in_content = false;
+    bool                    thinking_forced_open = false;
+    bool                    parse_tool_calls     = true;
+    bool                    debug                = false;  // Enable debug output for PEG parser
+    common_peg_arena        parser               = {};
     common_chat_parser_params() = default;
     common_chat_parser_params(const common_chat_params & chat_params) {
         format               = chat_params.format;
@@ -194,45 +237,47 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
 
 void common_chat_templates_free(struct common_chat_templates * tmpls);
 
-struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
+struct common_chat_templates_deleter {
+    void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); }
+};
 
 typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
 
-common_chat_templates_ptr common_chat_templates_init(
-                                    const struct llama_model * model,
-                                           const std::string & chat_template_override,
-                                           const std::string & bos_token_override = "",
-                                           const std::string & eos_token_override = "");
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+                                                     const std::string &        chat_template_override,
+                                                     const std::string &        bos_token_override = "",
+                                                     const std::string &        eos_token_override = "");
 
 bool         common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
 std::string  common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
 
-
-struct common_chat_params      common_chat_templates_apply(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs);
+struct common_chat_params common_chat_templates_apply(const struct common_chat_templates *        tmpls,
+                                                      const struct common_chat_templates_inputs & inputs);
 
 // Format single message, while taking into account the position of that message in chat history
-std::string common_chat_format_single(
-        const struct common_chat_templates * tmpls,
-        const std::vector<common_chat_msg> & past_msg,
-        const common_chat_msg & new_msg,
-        bool add_ass,
-        bool use_jinja);
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+                                      const std::vector<common_chat_msg> & past_msg,
+                                      const common_chat_msg &              new_msg,
+                                      bool                                 add_ass,
+                                      bool                                 use_jinja);
 
 // Returns an example of formatted chat
-std::string common_chat_format_example(
-    const struct common_chat_templates * tmpls,
-    bool use_jinja,
-    const std::map<std::string, std::string> & chat_template_kwargs);
+std::string common_chat_format_example(const struct common_chat_templates *       tmpls,
+                                       bool                                       use_jinja,
+                                       const std::map<std::string, std::string> & chat_template_kwargs);
 
-const char*               common_chat_format_name(common_chat_format format);
+const char *            common_chat_format_name(common_chat_format format);
 common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
 common_chat_msg           common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
 
 // used by arg and server
-const char *             common_reasoning_format_name(common_reasoning_format format);
-common_reasoning_format  common_reasoning_format_from_name(const std::string & format);
+const char *            common_reasoning_format_name(common_reasoning_format format);
+common_reasoning_format common_reasoning_format_from_name(const std::string & format);
+common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
+common_chat_msg common_chat_peg_parse(const common_peg_arena &   parser,
+                                      const std::string &        input,
+                                      bool                       is_partial,
+                                      const common_chat_syntax & syntax);
 
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
 
@@ -249,3 +294,10 @@ nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_
 
 // get template caps, useful for reporting to server /props endpoint
 std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);
+
+std::string common_chat_template_direct_apply(
+    const common_chat_template & tmpl,
+    const struct templates_params & inputs,
+    const std::optional<json> & messages_override = std::nullopt,
+    const std::optional<json> & tools_override = std::nullopt,
+    const std::optional<json> & additional_context = std::nullopt);
diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp
index 2aa156b177..17d7eae764 100644
--- a/common/jinja/value.cpp
+++ b/common/jinja/value.cpp
@@ -428,6 +428,22 @@ const func_builtins & global_builtins() {
             bool res = it != builtins.end();
             return mk_val<value_bool>(res);
         }},
+        {"test_is_in", [](const func_args & args) -> value {
+            args.ensure_count(2, 2);
+            value val_needle = args.get_pos(0);
+            value val_haystack = args.get_pos(1);
+            const auto & haystack = is_val<value_array>(val_haystack) ? val_haystack->as_array() : std::vector<value>(1, val_haystack);
+            for (auto it = haystack.cbegin(); it != haystack.cend(); it++) {
+                if ((*it)->type() == val_needle->type()) {
+                    if (is_val<value_string>(val_haystack) ?
+                        (*it)->as_string().str().find(val_needle->as_string().str()) != std::string::npos :
+                        value_compare(*it, val_needle, value_compare_op::eq)) {
+                        return mk_val<value_bool>(true);
+                    }
+                }
+            }
+            return mk_val<value_bool>(false);
+        }},
         {"test_is_sameas", [](const func_args & args) -> value {
             // Check if an object points to the same memory address as another object
             (void)args;
@@ -715,8 +731,26 @@ const func_builtins & value_string_t::get_builtins() const {
             return args.get_pos(0);
         }},
         {"tojson", tojson},
-        {"indent", [](const func_args &) -> value {
-            throw not_implemented_exception("String indent builtin not implemented");
+        {"indent", [](const func_args &args) -> value {
+            // no support for "first" as that would require us to somehow access generation context
+            args.ensure_count(2, 4);
+            args.ensure_vals<value_string, value_int, value_bool, value_bool>(true, true, false, false);
+
+            auto input = args.get_pos(0);
+            auto arg0 = args.get_pos(1);
+
+            int count = arg0->as_int();
+            if (count <= 0) {
+                throw raised_exception("indent must be a positive number");
+            }
+            std::string indented;
+            for (int i = 0; i < count; i++) {
+                indented.append(" ");
+            }
+            indented.append(input->as_string().str());
+            auto res = mk_val<value_string>(indented);
+            res->val_str.mark_input_based_on(input->as_string());
+            return res;
         }},
         {"join", [](const func_args &) -> value {
             throw not_implemented_exception("String join builtin not implemented");
diff --git a/common/jinja/value.h b/common/jinja/value.h
index 1c04760a08..0425bda5e3 100644
--- a/common/jinja/value.h
+++ b/common/jinja/value.h
@@ -617,6 +617,8 @@ struct value_undefined_t : public value_t {
     value_undefined_t(const std::string & h = "") : hint(h) {}
     virtual std::string type() const override { return hint.empty() ? "Undefined" : "Undefined (hint: '" + hint + "')"; }
     virtual bool is_undefined() const override { return true; }
+    // note: some templates use "is none" as equivalent to "is undefined"
+    virtual bool is_none() const override { return true; }
     virtual bool as_bool() const override { return false; }
     virtual std::string as_repr() const override { return type(); }
     virtual const func_builtins & get_builtins() const override;
diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp
index f2fc84500f..80dd105246 100644
--- a/common/peg-parser.cpp
+++ b/common/peg-parser.cpp
@@ -1,28 +1,32 @@
-#include "common.h"
 #include "peg-parser.h"
-#include "json-schema-to-grammar.h"
-#include "unicode.h"
 
-#include <nlohmann/json.hpp>
+#include "common.h"
+#include "json-schema-to-grammar.h"
+#include "log.h"
+#include "unicode.h"
 
 #include <algorithm>
 #include <initializer_list>
 #include <map>
 #include <memory>
+#include <nlohmann/json.hpp>
 #include <regex>
 #include <stdexcept>
 #include <unordered_set>
 
 // Trick to catch missing branches
-template <typename T>
-inline constexpr bool is_always_false_v = false;
+template <typename T> inline constexpr bool is_always_false_v = false;
 
 const char * common_peg_parse_result_type_name(common_peg_parse_result_type type) {
     switch (type) {
-        case COMMON_PEG_PARSE_RESULT_FAIL:            return "fail";
-        case COMMON_PEG_PARSE_RESULT_SUCCESS:         return "success";
-        case COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT: return "need_more_input";
-        default:                                      return "unknown";
+        case COMMON_PEG_PARSE_RESULT_FAIL:
+            return "fail";
+        case COMMON_PEG_PARSE_RESULT_SUCCESS:
+            return "success";
+        case COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT:
+            return "need_more_input";
+        default:
+            return "unknown";
     }
 }
 
@@ -34,81 +38,88 @@ static bool is_hex_digit(const char c) {
 // This is used in common_peg_until_parser and to build a GBNF exclusion grammar
 struct trie {
     struct node {
-        size_t depth = 0;
-        std::map<unsigned char, size_t> children;
-        bool is_word;
+        std::map<uint32_t, size_t> children;
+        bool                       is_word = false;
     };
 
     std::vector<node> nodes;
 
     trie(const std::vector<std::string> & words) {
-      create_node(); // root node
-      for (const auto & w : words) {
-          insert(w);
-      }
+        create_node();  // root node
+        for (const auto & w : words) {
+            insert(w);
+        }
     }
 
     enum match_result { NO_MATCH, PARTIAL_MATCH, COMPLETE_MATCH };
 
     // Check if a delimiter starts at the given position
     match_result check_at(std::string_view sv, size_t start_pos) const {
-        size_t current = 0; // Start at root
-        size_t pos = start_pos;
+        size_t current = 0;  // Start at root
+        size_t pos     = start_pos;
+
+        // LOG_DBG("%s: checking at pos %zu, sv='%s'\n", __func__, start_pos, std::string(sv).c_str());
 
         while (pos < sv.size()) {
-            auto it = nodes[current].children.find(sv[pos]);
+            auto result = parse_utf8_codepoint(sv, pos);
+            if (result.status != utf8_parse_result::SUCCESS) {
+                break;
+            }
+
+            auto it = nodes[current].children.find(result.codepoint);
             if (it == nodes[current].children.end()) {
                 // Can't continue matching
-                return match_result{match_result::NO_MATCH};
+                return match_result{ match_result::NO_MATCH };
             }
 
             current = it->second;
-            pos++;
+            pos += result.bytes_consumed;
 
             // Check if we've matched a complete word
             if (nodes[current].is_word) {
-                return match_result{match_result::COMPLETE_MATCH};
+                // LOG_DBG("%s: complete match found at pos %zu\n", __func__, pos);
+                return match_result{ match_result::COMPLETE_MATCH };
             }
         }
 
         // Reached end of input while still in the trie (not at root)
         if (current != 0) {
             // We're in the middle of a potential match
-            return match_result{match_result::PARTIAL_MATCH};
+            return match_result{ match_result::PARTIAL_MATCH };
         }
 
         // Reached end at root (no match)
-        return match_result{match_result::NO_MATCH};
+        return match_result{ match_result::NO_MATCH };
     }
 
     struct prefix_and_next {
-        std::string prefix;
-        std::string next_chars;
+        std::vector<uint32_t> prefix;
+        std::vector<uint32_t> next_chars;
     };
 
     std::vector<prefix_and_next> collect_prefix_and_next() {
-        std::string prefix;
+        std::vector<uint32_t>        prefix;
         std::vector<prefix_and_next> result;
         collect_prefix_and_next(0, prefix, result);
         return result;
     }
 
   private:
-    void collect_prefix_and_next(size_t index, std::string & prefix, std::vector<prefix_and_next> & out) {
+    void collect_prefix_and_next(size_t index, std::vector<uint32_t> & prefix, std::vector<prefix_and_next> & out) {
         if (!nodes[index].is_word) {
             if (!nodes[index].children.empty()) {
-                std::string chars;
+                std::vector<uint32_t> chars;
                 chars.reserve(nodes[index].children.size());
                 for (const auto & p : nodes[index].children) {
                     chars.push_back(p.first);
                 }
-                out.emplace_back(prefix_and_next{prefix, chars});
+                out.emplace_back(prefix_and_next{ prefix, chars });
             }
         }
 
         for (const auto & p : nodes[index].children) {
-            unsigned char ch = p.first;
-            auto child = p.second;
+            uint32_t ch    = p.first;
+            auto     child = p.second;
             prefix.push_back(ch);
             collect_prefix_and_next(child, prefix, out);
             prefix.pop_back();
@@ -123,13 +134,21 @@ struct trie {
 
     void insert(const std::string & word) {
         size_t current = 0;
-        for (unsigned char ch : word) {
+        size_t pos     = 0;
+        while (pos < word.length()) {
+            auto result = parse_utf8_codepoint(word, pos);
+            if (result.status != utf8_parse_result::SUCCESS) {
+                break;
+            }
+
+            uint32_t ch = result.codepoint;
+            pos += result.bytes_consumed;
+
             auto it = nodes[current].children.find(ch);
             if (it == nodes[current].children.end()) {
-                size_t child = create_node();
-                nodes[child].depth = nodes[current].depth + 1;
+                size_t child                = create_node();
                 nodes[current].children[ch] = child;
-                current = child;
+                current                     = child;
             } else {
                 current = it->second;
             }
@@ -140,14 +159,14 @@ struct trie {
 
 static std::pair<uint32_t, size_t> parse_hex_escape(const std::string & str, size_t pos, int hex_count) {
     if (pos + hex_count > str.length()) {
-        return {0, 0};
+        return { 0, 0 };
     }
 
     uint32_t value = 0;
     for (int i = 0; i < hex_count; i++) {
         char c = str[pos + i];
         if (!is_hex_digit(c)) {
-            return {0, 0};
+            return { 0, 0 };
         }
         value <<= 4;
         if ('a' <= c && c <= 'f') {
@@ -160,53 +179,64 @@ static std::pair<uint32_t, size_t> parse_hex_escape(const std::string & str, siz
             break;
         }
     }
-    return {value, static_cast<size_t>(hex_count)};
+    return { value, static_cast<size_t>(hex_count) };
 }
 
 static std::pair<uint32_t, size_t> parse_char_class_char(const std::string & content, size_t pos) {
     if (content[pos] == '\\' && pos + 1 < content.length()) {
         switch (content[pos + 1]) {
-            case 'x': {
-                auto result = parse_hex_escape(content, pos + 2, 2);
-                if (result.second > 0) {
-                    return {result.first, 2 + result.second};
+            case 'x':
+                {
+                    auto result = parse_hex_escape(content, pos + 2, 2);
+                    if (result.second > 0) {
+                        return { result.first, 2 + result.second };
+                    }
+                    // Invalid escape, treat as literal 'x'
+                    return { static_cast<uint32_t>('x'), 2 };
                 }
-                // Invalid escape, treat as literal 'x'
-                return {static_cast<uint32_t>('x'), 2};
-            }
-            case 'u': {
-                auto result = parse_hex_escape(content, pos + 2, 4);
-                if (result.second > 0) {
-                    return {result.first, 2 + result.second};
+            case 'u':
+                {
+                    auto result = parse_hex_escape(content, pos + 2, 4);
+                    if (result.second > 0) {
+                        return { result.first, 2 + result.second };
+                    }
+                    // Invalid escape, treat as literal 'u'
+                    return { static_cast<uint32_t>('u'), 2 };
                 }
-                // Invalid escape, treat as literal 'u'
-                return {static_cast<uint32_t>('u'), 2};
-            }
-            case 'U': {
-                auto result = parse_hex_escape(content, pos + 2, 8);
-                if (result.second > 0) {
-                    return {result.first, 2 + result.second};
+            case 'U':
+                {
+                    auto result = parse_hex_escape(content, pos + 2, 8);
+                    if (result.second > 0) {
+                        return { result.first, 2 + result.second };
+                    }
+                    // Invalid escape, treat as literal 'U'
+                    return { static_cast<uint32_t>('U'), 2 };
                 }
-                // Invalid escape, treat as literal 'U'
-                return {static_cast<uint32_t>('U'), 2};
-            }
-            case 'n':  return {'\n', 2};
-            case 't':  return {'\t', 2};
-            case 'r':  return {'\r', 2};
-            case '\\': return {'\\', 2};
-            case ']':  return {']', 2};
-            case '[':  return {'[', 2};
-            default:   return {static_cast<uint32_t>(content[pos + 1]), 2};
+            case 'n':
+                return { '\n', 2 };
+            case 't':
+                return { '\t', 2 };
+            case 'r':
+                return { '\r', 2 };
+            case '\\':
+                return { '\\', 2 };
+            case ']':
+                return { ']', 2 };
+            case '[':
+                return { '[', 2 };
+            default:
+                return { static_cast<uint32_t>(content[pos + 1]), 2 };
         }
     }
 
     // Regular character - return as codepoint
-    return {static_cast<uint32_t>(static_cast<unsigned char>(content[pos])), 1};
+    return { static_cast<uint32_t>(static_cast<unsigned char>(content[pos])), 1 };
 }
 
-static std::pair<std::vector<common_peg_chars_parser::char_range>, bool> parse_char_classes(const std::string & classes) {
+static std::pair<std::vector<common_peg_chars_parser::char_range>, bool> parse_char_classes(
+    const std::string & classes) {
     std::vector<common_peg_chars_parser::char_range> ranges;
-    bool negated = false;
+    bool                                             negated = false;
 
     std::string content = classes;
     if (content.front() == '[') {
@@ -231,14 +261,14 @@ static std::pair<std::vector<common_peg_chars_parser::char_range>, bool> parse_c
         if (i + 1 < content.length() && content[i] == '-') {
             // Range detected
             auto [end, end_len] = parse_char_class_char(content, i + 1);
-            ranges.push_back(common_peg_chars_parser::char_range{start, end});
+            ranges.push_back(common_peg_chars_parser::char_range{ start, end });
             i += 1 + end_len;
         } else {
-            ranges.push_back(common_peg_chars_parser::char_range{start, start});
+            ranges.push_back(common_peg_chars_parser::char_range{ start, start });
         }
     }
 
-    return {ranges, negated};
+    return { ranges, negated };
 }
 
 void common_peg_ast_arena::visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const {
@@ -279,29 +309,53 @@ common_peg_parser_id common_peg_arena::get_rule(const std::string & name) const
 }
 
 struct parser_executor {
-    const common_peg_arena & arena;
+    const common_peg_arena &   arena;
     common_peg_parse_context & ctx;
-    size_t start_pos;
+    size_t                     start_pos;
 
-    parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start)
-        : arena(arena), ctx(ctx), start_pos(start) {}
+    parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start) :
+        arena(arena),
+        ctx(ctx),
+        start_pos(start) {}
+
+    std::string debug_indent() const { return std::string(ctx.parse_depth * 2, ' '); }
+
+    std::string debug_input_snippet(size_t pos, size_t len = 60) const {
+        if (pos >= ctx.input.size()) {
+            return "<EOF>";
+        }
+        auto        snippet = ctx.input.substr(pos, len);
+        // Escape newlines for display
+        std::string result;
+        for (char c : snippet) {
+            if (c == '\n') {
+                result += "\\n";
+            } else if (c == '\r') {
+                result += "\\r";
+            } else if (c == '\t') {
+                result += "\\t";
+            } else {
+                result += c;
+            }
+        }
+        if (pos + len < ctx.input.size()) {
+            result += "...";
+        }
+        return result;
+    }
 
     common_peg_parse_result operator()(const common_peg_epsilon_parser & /* p */) const {
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_start_parser & /* p */) const {
-        return common_peg_parse_result(
-            start_pos == 0 ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL,
-            start_pos
-        );
+        return common_peg_parse_result(start_pos == 0 ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL,
+                                       start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_end_parser & /* p */) const {
         return common_peg_parse_result(
-            start_pos >= ctx.input.size() ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL,
-            start_pos
-        );
+            start_pos >= ctx.input.size() ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_literal_parser & p) {
@@ -323,12 +377,39 @@ struct parser_executor {
     }
 
     common_peg_parse_result operator()(const common_peg_sequence_parser & p) {
-        auto pos = start_pos;
+        if (ctx.debug) {
+            LOG_DBG("%sSEQ start at %zu '%s' (%zu children)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.children.size());
+        }
+        ctx.parse_depth++;
+
+        auto                           pos = start_pos;
         std::vector<common_peg_ast_id> nodes;
 
-        for (const auto & child_id : p.children) {
+        for (size_t i = 0; i < p.children.size(); i++) {
+            const auto & child_id = p.children[i];
+            if (ctx.debug) {
+                fprintf(stderr, "%sSEQ child %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+            }
             auto result = arena.parse(child_id, ctx, pos);
+
+            if (ctx.debug) {
+                fprintf(stderr, "%sSEQ child %zu: %s at %zu->%zu\n", debug_indent().c_str(), i,
+                        common_peg_parse_result_type_name(result.type), result.start, result.end);
+            }
+
             if (result.fail()) {
+                ctx.parse_depth--;
+                if (ctx.is_partial && result.end >= ctx.input.size()) {
+                    if (ctx.debug) {
+                        fprintf(stderr, "%sSEQ -> NEED_MORE (child failed at end)\n", debug_indent().c_str());
+                    }
+                    return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end,
+                                                   std::move(nodes));
+                }
+                if (ctx.debug) {
+                    fprintf(stderr, "%sSEQ -> FAIL\n", debug_indent().c_str());
+                }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, result.end);
             }
 
@@ -337,43 +418,93 @@ struct parser_executor {
             }
 
             if (result.need_more_input()) {
-                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sSEQ -> NEED_MORE\n", debug_indent().c_str());
+                }
+                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end,
+                                               std::move(nodes));
             }
 
             pos = result.end;
         }
 
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sSEQ -> SUCCESS at %zu->%zu\n", debug_indent().c_str(), start_pos, pos);
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
     }
 
     common_peg_parse_result operator()(const common_peg_choice_parser & p) {
+        if (ctx.debug) {
+            fprintf(stderr, "%sCHOICE start at %zu '%s' (%zu options)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.children.size());
+        }
+        ctx.parse_depth++;
+
         auto pos = start_pos;
-        for (const auto & child_id : p.children) {
+        for (size_t i = 0; i < p.children.size(); i++) {
+            const auto & child_id = p.children[i];
+            if (ctx.debug) {
+                fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+            }
             auto result = arena.parse(child_id, ctx, pos);
+            if (ctx.debug) {
+                fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i,
+                        common_peg_parse_result_type_name(result.type));
+            }
             if (!result.fail()) {
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sCHOICE -> %s (option %zu)\n", debug_indent().c_str(),
+                            common_peg_parse_result_type_name(result.type), i);
+                }
                 return result;
             }
         }
 
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sCHOICE -> FAIL (no options matched)\n", debug_indent().c_str());
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_repetition_parser & p) {
-        auto pos = start_pos;
-        int match_count = 0;
+        if (ctx.debug) {
+            fprintf(stderr, "%sREPEAT start at %zu '%s' (min=%d, max=%d)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.min_count, p.max_count);
+        }
+        ctx.parse_depth++;
+
+        auto                           pos         = start_pos;
+        int                            match_count = 0;
         std::vector<common_peg_ast_id> nodes;
 
         // Try to match up to max_count times (or unlimited if max_count is -1)
         while (p.max_count == -1 || match_count < p.max_count) {
             if (pos >= ctx.input.size()) {
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT: at end of input, count=%d\n", debug_indent().c_str(), match_count);
+                }
                 break;
             }
 
             auto result = arena.parse(p.child, ctx, pos);
 
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT iter %d: %s at %zu->%zu, nodes=%zu\n", debug_indent().c_str(), match_count,
+                        common_peg_parse_result_type_name(result.type), result.start, result.end, result.nodes.size());
+                fprintf(stderr, "%sREPEAT CHILD: %s\n", debug_indent().c_str(), arena.dump(p.child).c_str());
+            }
+
             if (result.success()) {
                 // Prevent infinite loop on empty matches
                 if (result.end == pos) {
+                    if (ctx.debug) {
+                        fprintf(stderr, "%s  REPEAT: empty match, stopping\n", debug_indent().c_str());
+                    }
                     break;
                 }
 
@@ -391,21 +522,45 @@ struct parser_executor {
                     nodes.insert(nodes.end(), result.nodes.begin(), result.nodes.end());
                 }
 
-                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT -> NEED_MORE (count=%d, nodes=%zu)\n", debug_indent().c_str(),
+                            match_count, nodes.size());
+                }
+                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end,
+                                               std::move(nodes));
             }
 
             // Child failed - stop trying
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT: child failed, stopping\n", debug_indent().c_str());
+            }
             break;
         }
 
         // Check if we got enough matches
         if (p.min_count > 0 && match_count < p.min_count) {
+            ctx.parse_depth--;
             if (pos >= ctx.input.size() && ctx.is_partial) {
-                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, std::move(nodes));
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT -> NEED_MORE (not enough matches: %d < %d)\n", debug_indent().c_str(),
+                            match_count, p.min_count);
+                }
+                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos,
+                                               std::move(nodes));
+            }
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT -> FAIL (not enough matches: %d < %d)\n", debug_indent().c_str(), match_count,
+                        p.min_count);
             }
             return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
         }
 
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sREPEAT -> SUCCESS (count=%d, nodes=%zu)\n", debug_indent().c_str(), match_count,
+                    nodes.size());
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
     }
 
@@ -463,8 +618,8 @@ struct parser_executor {
     }
 
     common_peg_parse_result operator()(const common_peg_chars_parser & p) const {
-        auto pos = start_pos;
-        int match_count = 0;
+        auto pos         = start_pos;
+        int  match_count = 0;
 
         // Try to match up to max_count times (or unlimited if max_count is -1)
         while (p.max_count == -1 || match_count < p.max_count) {
@@ -527,7 +682,7 @@ struct parser_executor {
     }
 
     static common_peg_parse_result handle_escape_sequence(common_peg_parse_context & ctx, size_t start, size_t & pos) {
-        ++pos; // consume '\'
+        ++pos;  // consume '\'
         if (pos >= ctx.input.size()) {
             if (!ctx.is_partial) {
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start);
@@ -555,7 +710,7 @@ struct parser_executor {
     }
 
     static common_peg_parse_result handle_unicode_escape(common_peg_parse_context & ctx, size_t start, size_t & pos) {
-        ++pos; // consume 'u'
+        ++pos;  // consume 'u'
         for (int i = 0; i < 4; ++i) {
             if (pos >= ctx.input.size()) {
                 if (!ctx.is_partial) {
@@ -617,7 +772,7 @@ struct parser_executor {
         trie matcher(p.delimiters);
 
         // Scan input and check for delimiters
-        size_t pos = start_pos;
+        size_t pos            = start_pos;
         size_t last_valid_pos = start_pos;
 
         while (pos < ctx.input.size()) {
@@ -638,16 +793,12 @@ struct parser_executor {
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
             }
 
-            // Check if a delimiter starts at this position
             auto match = matcher.check_at(ctx.input, pos);
-
             if (match == trie::COMPLETE_MATCH) {
-                // Found a complete delimiter, return everything before it
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
             }
 
             if (match == trie::PARTIAL_MATCH) {
-                // Found a partial match extending to end of input, return everything before it
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
             }
 
@@ -673,18 +824,12 @@ struct parser_executor {
         if (!result.fail()) {
             std::string_view text;
             if (result.start < ctx.input.size()) {
-                text = std::string_view(ctx.input).substr(result.start, result.end - result.start);
+                text = std::string_view(ctx.input).substr(
+                    result.start, std::min(result.end - result.start, ctx.input.size() - result.start));
             }
 
-            auto node_id = ctx.ast.add_node(
-                p.name,
-                "",
-                result.start,
-                result.end,
-                text,
-                std::move(result.nodes),
-                result.need_more_input()
-            );
+            auto node_id = ctx.ast.add_node(p.name, "", result.start, result.end, text, std::move(result.nodes),
+                                            result.need_more_input());
 
             return common_peg_parse_result(result.type, result.start, result.end, { node_id });
         }
@@ -694,6 +839,9 @@ struct parser_executor {
 
     common_peg_parse_result operator()(const common_peg_tag_parser & p) {
         // Parse the child
+        if (ctx.debug) {
+            fprintf(stderr, "%sTAG: %s\n", debug_indent().c_str(), p.tag.c_str());
+        }
         auto result = arena.parse(p.child, ctx, start_pos);
 
         if (!result.fail()) {
@@ -702,15 +850,8 @@ struct parser_executor {
                 text = std::string_view(ctx.input).substr(result.start, result.end - result.start);
             }
 
-            auto node_id = ctx.ast.add_node(
-                "",
-                p.tag,
-                result.start,
-                result.end,
-                text,
-                std::move(result.nodes),
-                result.need_more_input()
-            );
+            auto node_id = ctx.ast.add_node("", p.tag, result.start, result.end, text, std::move(result.nodes),
+                                            result.need_more_input());
 
             return common_peg_parse_result(result.type, result.start, result.end, { node_id });
         }
@@ -740,60 +881,89 @@ common_peg_parse_result common_peg_arena::parse(common_peg_parse_context & ctx,
     return parse(root_, ctx, start);
 }
 
-common_peg_parse_result common_peg_arena::parse(common_peg_parser_id id, common_peg_parse_context & ctx, size_t start) const {
+common_peg_parse_result common_peg_arena::parse(common_peg_parser_id       id,
+                                                common_peg_parse_context & ctx,
+                                                size_t                     start) const {
     // Execute parser
-    const auto & parser = parsers_.at(id);
+    const auto &    parser = parsers_.at(id);
     parser_executor exec(*this, ctx, start);
     return std::visit(exec, parser);
 }
 
 common_peg_parser_id common_peg_arena::resolve_ref(common_peg_parser_id id) {
     const auto & parser = parsers_.at(id);
-    if (auto ref = std::get_if<common_peg_ref_parser>(&parser)) {
+    if (const auto *ref = std::get_if<common_peg_ref_parser>(&parser)) {
         return get_rule(ref->name);
     }
     return id;
 }
 
+static void bfs_node(common_peg_ast_arena &arena, std::ostringstream & oss, const common_peg_ast_node & node, int indent) {
+    for (int i = 0; i < indent; i++) {
+        oss << "  ";
+    }
+    oss << "NODE " << node.id;
+    if (!node.rule.empty()) {
+        oss << " (rule " << node.rule << ")";
+    }
+    if (!node.tag.empty()) {
+        oss << " (tag " << node.tag << ")";
+    }
+    oss << " ['" << node.text << "']\n";
+    for (const auto child : node.children) {
+        bfs_node(arena, oss, arena.get(child), indent + 1);
+    }
+}
+
+std::string common_peg_ast_arena::dump() {
+    std::ostringstream oss;
+    for (auto & node : nodes_) {
+        bfs_node(*this, oss, node, 0);
+    }
+    return oss.str();
+}
+
 void common_peg_arena::resolve_refs() {
     // Walk through all parsers and replace refs with their corresponding rule IDs
     for (auto & parser : parsers_) {
-        std::visit([this](auto & p) {
-            using T = std::decay_t<decltype(p)>;
+        std::visit(
+            [this](auto & p) {
+                using T = std::decay_t<decltype(p)>;
 
-            if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-                for (auto & child : p.children) {
-                    child = resolve_ref(child);
+                if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                    for (auto & child : p.children) {
+                        child = resolve_ref(child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                    for (auto & child : p.children) {
+                        child = resolve_ref(child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_repetition_parser> ||
+                                     std::is_same_v<T, common_peg_and_parser> ||
+                                     std::is_same_v<T, common_peg_not_parser> ||
+                                     std::is_same_v<T, common_peg_tag_parser> ||
+                                     std::is_same_v<T, common_peg_atomic_parser>) {
+                    p.child = resolve_ref(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                    p.child = resolve_ref(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
+                    p.child = resolve_ref(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
+                                     std::is_same_v<T, common_peg_start_parser> ||
+                                     std::is_same_v<T, common_peg_end_parser> ||
+                                     std::is_same_v<T, common_peg_ref_parser> ||
+                                     std::is_same_v<T, common_peg_until_parser> ||
+                                     std::is_same_v<T, common_peg_literal_parser> ||
+                                     std::is_same_v<T, common_peg_json_string_parser> ||
+                                     std::is_same_v<T, common_peg_chars_parser> ||
+                                     std::is_same_v<T, common_peg_any_parser> ||
+                                     std::is_same_v<T, common_peg_space_parser>) {
+                    // These rules do not have children
+                } else {
+                    static_assert(is_always_false_v<T>);
                 }
-            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-                for (auto & child : p.children) {
-                    child = resolve_ref(child);
-                }
-            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser> ||
-                                 std::is_same_v<T, common_peg_and_parser> ||
-                                 std::is_same_v<T, common_peg_not_parser> ||
-                                 std::is_same_v<T, common_peg_tag_parser> ||
-                                 std::is_same_v<T, common_peg_atomic_parser>) {
-                p.child = resolve_ref(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-                p.child = resolve_ref(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-                p.child = resolve_ref(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
-                                 std::is_same_v<T, common_peg_start_parser> ||
-                                 std::is_same_v<T, common_peg_end_parser> ||
-                                 std::is_same_v<T, common_peg_ref_parser> ||
-                                 std::is_same_v<T, common_peg_until_parser> ||
-                                 std::is_same_v<T, common_peg_literal_parser> ||
-                                 std::is_same_v<T, common_peg_json_string_parser> ||
-                                 std::is_same_v<T, common_peg_chars_parser> ||
-                                 std::is_same_v<T, common_peg_any_parser> ||
-                                 std::is_same_v<T, common_peg_space_parser>) {
-                // These rules do not have children
-            } else {
-                static_assert(is_always_false_v<T>);
-            }
-        }, parser);
+            },
+            parser);
     }
 
     // Also flatten root if it's a ref
@@ -803,63 +973,86 @@ void common_peg_arena::resolve_refs() {
 }
 
 std::string common_peg_arena::dump(common_peg_parser_id id) const {
+    std::unordered_set<common_peg_parser_id> visited;
+    return dump_impl(id, visited);
+}
+
+std::string common_peg_arena::dump_impl(common_peg_parser_id                       id,
+                                        std::unordered_set<common_peg_parser_id> & visited) const {
+    // Check for cycles
+    if (visited.count(id)) {
+        return "[cycle]";
+    }
+    visited.insert(id);
+
     const auto & parser = parsers_.at(id);
 
-    return std::visit([this](const auto & p) -> std::string {
-        using T = std::decay_t<decltype(p)>;
+    return std::visit(
+        [this, &visited](const auto & p) -> std::string {
+            using T = std::decay_t<decltype(p)>;
 
-        if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
-            return "Epsilon";
-        } else if constexpr (std::is_same_v<T, common_peg_start_parser>) {
-            return "Start";
-        } else if constexpr (std::is_same_v<T, common_peg_end_parser>) {
-            return "End";
-        } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
-            return "Literal(" + p.literal + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-            std::vector<std::string> parts;
-            for (const auto & child : p.children) {
-                parts.push_back(dump(child));
+            if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
+                return "Epsilon";
+            } else if constexpr (std::is_same_v<T, common_peg_start_parser>) {
+                return "Start";
+            } else if constexpr (std::is_same_v<T, common_peg_end_parser>) {
+                return "End";
+            } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
+                return "Literal(" + p.literal + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                std::vector<std::string> parts;
+                for (const auto & child : p.children) {
+                    parts.push_back(dump_impl(child, visited));
+                }
+                return "Sequence(" + string_join(parts, ", ") + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                std::vector<std::string> parts;
+                for (const auto & child : p.children) {
+                    parts.push_back(dump_impl(child, visited));
+                }
+                return "Choice(" + string_join(parts, ", ") + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
+                if (p.max_count == -1) {
+                    return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) +
+                           ", unbounded)";
+                }
+                return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + ", " +
+                       std::to_string(p.max_count) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
+                return "And(" + dump_impl(p.child, visited) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
+                return "Not(" + dump_impl(p.child, visited) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+                return "Atomic(" + dump_impl(p.child, visited) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
+                return "Any";
+            } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
+                return "Space";
+            } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
+                if (p.max_count == -1) {
+                    return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)";
+                }
+                return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " +
+                       std::to_string(p.max_count) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
+                return "JsonString()";
+            } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
+                return "Until(" + string_join(p.delimiters, " | ") + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
+                return "Schema(" + dump_impl(p.child, visited) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                return "Rule(" + p.name + ", " + dump_impl(p.child, visited) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
+                return "Ref(" + p.name + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+                return "Tag(" + p.tag + ", " + dump(p.child) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+                return "Atomic(" + dump(p.child) + ")";
+            } else {
+                return "Unknown";
             }
-            return "Sequence(" + string_join(parts, ", ") + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-            std::vector<std::string> parts;
-            for (const auto & child : p.children) {
-                parts.push_back(dump(child));
-            }
-            return "Choice(" + string_join(parts, ", ") + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
-            if (p.max_count == -1) {
-                return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", unbounded)";
-            }
-            return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
-            return "And(" + dump(p.child) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
-            return "Not(" + dump(p.child) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
-            return "Any";
-        } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
-            return "Space";
-        } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
-            if (p.max_count == -1) {
-                return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)";
-            }
-            return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-            return "JsonString()";
-        } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
-            return "Until(" + string_join(p.delimiters, " | ") + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-            return "Schema(" + dump(p.child) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-            return "Rule(" + p.name + ", " + dump(p.child) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
-            return "Ref(" + p.name + ")";
-        } else {
-            return "Unknown";
-        }
-    }, parser);
+        },
+        parser);
 }
 
 common_peg_parser & common_peg_parser::operator=(const common_peg_parser & other) {
@@ -868,25 +1061,25 @@ common_peg_parser & common_peg_parser::operator=(const common_peg_parser & other
 }
 
 common_peg_parser & common_peg_parser::operator+=(const common_peg_parser & other) {
-    id_ = builder_.sequence({id_, other.id_});
+    id_ = builder_.sequence({ id_, other.id_ });
     return *this;
 }
 
 common_peg_parser & common_peg_parser::operator|=(const common_peg_parser & other) {
-    id_ = builder_.choice({id_, other.id_});
+    id_ = builder_.choice({ id_, other.id_ });
     return *this;
 }
 
 common_peg_parser common_peg_parser::operator+(const common_peg_parser & other) const {
-    return builder_.sequence({id_, other.id_});
+    return builder_.sequence({ id_, other.id_ });
 }
 
 common_peg_parser common_peg_parser::operator|(const common_peg_parser & other) const {
-    return builder_.choice({id_, other.id_});
+    return builder_.choice({ id_, other.id_ });
 }
 
 common_peg_parser common_peg_parser::operator<<(const common_peg_parser & other) const {
-    return builder_.sequence({id_, builder_.space(), other.id_});
+    return builder_.sequence({ id_, builder_.space(), other.id_ });
 }
 
 common_peg_parser common_peg_parser::operator+(const char * str) const {
@@ -955,7 +1148,7 @@ common_peg_parser common_peg_parser_builder::sequence(const std::vector<common_p
             flattened.push_back(p);
         }
     }
-    return wrap(arena_.add_parser(common_peg_sequence_parser{flattened}));
+    return wrap(arena_.add_parser(common_peg_sequence_parser{ flattened }));
 }
 
 common_peg_parser common_peg_parser_builder::sequence(const std::vector<common_peg_parser> & parsers) {
@@ -987,7 +1180,7 @@ common_peg_parser common_peg_parser_builder::choice(const std::vector<common_peg
             flattened.push_back(p);
         }
     }
-    return wrap(arena_.add_parser(common_peg_choice_parser{flattened}));
+    return wrap(arena_.add_parser(common_peg_choice_parser{ flattened }));
 }
 
 common_peg_parser common_peg_parser_builder::choice(const std::vector<common_peg_parser> & parsers) {
@@ -1010,36 +1203,42 @@ common_peg_parser common_peg_parser_builder::choice(std::initializer_list<common
 
 common_peg_parser common_peg_parser_builder::chars(const std::string & classes, int min, int max) {
     auto [ranges, negated] = parse_char_classes(classes);
-    return wrap(arena_.add_parser(common_peg_chars_parser{classes, ranges, negated, min, max}));
+    return wrap(arena_.add_parser(common_peg_chars_parser{ classes, ranges, negated, min, max }));
 }
 
-common_peg_parser common_peg_parser_builder::schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw) {
-    return wrap(arena_.add_parser(common_peg_schema_parser{p.id(), name, std::make_shared<nlohmann::ordered_json>(schema), raw}));
+common_peg_parser common_peg_parser_builder::schema(const common_peg_parser &      p,
+                                                    const std::string &            name,
+                                                    const nlohmann::ordered_json & schema,
+                                                    bool                           raw) {
+    return wrap(arena_.add_parser(
+        common_peg_schema_parser{ p.id(), name, std::make_shared<nlohmann::ordered_json>(schema), raw }));
 }
 
 common_peg_parser common_peg_parser_builder::rule(const std::string & name, const common_peg_parser & p, bool trigger) {
     auto clean_name = rule_name(name);
-    auto rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, p.id(), trigger});
+    auto rule_id    = arena_.add_parser(common_peg_rule_parser{ clean_name, p.id(), trigger });
     arena_.add_rule(clean_name, rule_id);
     return ref(clean_name);
 }
 
-common_peg_parser common_peg_parser_builder::rule(const std::string & name, const std::function<common_peg_parser()> & builder_fn, bool trigger) {
+common_peg_parser common_peg_parser_builder::rule(const std::string &                        name,
+                                                  const std::function<common_peg_parser()> & builder_fn,
+                                                  bool                                       trigger) {
     auto clean_name = rule_name(name);
     if (arena_.has_rule(clean_name)) {
         return ref(clean_name);
     }
 
     // Create placeholder rule to allow recursive references
-    auto placeholder = any();  // Temporary placeholder
-    auto placeholder_rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, placeholder.id(), trigger});
+    auto placeholder         = any();  // Temporary placeholder
+    auto placeholder_rule_id = arena_.add_parser(common_peg_rule_parser{ clean_name, placeholder.id(), trigger });
     arena_.add_rule(clean_name, placeholder_rule_id);
 
     // Build the actual parser
     auto parser = builder_fn();
 
     // Replace placeholder with actual rule
-    auto rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, parser.id(), trigger});
+    auto rule_id              = arena_.add_parser(common_peg_rule_parser{ clean_name, parser.id(), trigger });
     arena_.rules_[clean_name] = rule_id;
 
     return ref(clean_name);
@@ -1056,77 +1255,49 @@ common_peg_arena common_peg_parser_builder::build() {
 
 // JSON parsers
 common_peg_parser common_peg_parser_builder::json_number() {
-   return rule("json-number", [this]() {
+    return rule("json-number", [this]() {
         auto digit1_9 = chars("[1-9]", 1, 1);
-        auto digits = chars("[0-9]");
-        auto int_part = choice({literal("0"), sequence({digit1_9, chars("[0-9]", 0, -1)})});
-        auto frac = sequence({literal("."), digits});
-        auto exp = sequence({choice({literal("e"), literal("E")}), optional(chars("[+-]", 1, 1)), digits});
-        return sequence({optional(literal("-")), int_part, optional(frac), optional(exp), space()});
+        auto digits   = chars("[0-9]");
+        auto int_part = choice({ literal("0"), sequence({ digit1_9, chars("[0-9]", 0, -1) }) });
+        auto frac     = sequence({ literal("."), digits });
+        auto exp      = sequence({ choice({ literal("e"), literal("E") }), optional(chars("[+-]", 1, 1)), digits });
+        return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), space() });
     });
 }
 
 common_peg_parser common_peg_parser_builder::json_string() {
-    return rule("json-string", [this]() {
-        return sequence({literal("\""), json_string_content(), literal("\""), space()});
-    });
+    return rule("json-string",
+                [this]() { return sequence({ literal("\""), json_string_content(), literal("\""), space() }); });
 }
 
 common_peg_parser common_peg_parser_builder::json_bool() {
-    return rule("json-bool", [this]() {
-        return sequence({choice({literal("true"), literal("false")}), space()});
-    });
+    return rule("json-bool", [this]() { return sequence({ choice({ literal("true"), literal("false") }), space() }); });
 }
 
 common_peg_parser common_peg_parser_builder::json_null() {
-    return rule("json-null", [this]() {
-        return sequence({literal("null"), space()});
-    });
+    return rule("json-null", [this]() { return sequence({ literal("null"), space() }); });
 }
 
 common_peg_parser common_peg_parser_builder::json_object() {
     return rule("json-object", [this]() {
-        auto ws = space();
-        auto member = sequence({json_string(), ws, literal(":"), ws, json()});
-        auto members = sequence({member, zero_or_more(sequence({ws, literal(","), ws, member}))});
-        return sequence({
-            literal("{"),
-            ws,
-            choice({
-                literal("}"),
-                sequence({members, ws, literal("}")})
-            }),
-            ws
-        });
+        auto ws      = space();
+        auto member  = sequence({ json_string(), ws, literal(":"), ws, json() });
+        auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
+        return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) });
     });
 }
 
 common_peg_parser common_peg_parser_builder::json_array() {
     return rule("json-array", [this]() {
-        auto ws = space();
-        auto elements = sequence({json(), zero_or_more(sequence({literal(","), ws, json()}))});
-        return sequence({
-            literal("["),
-            ws,
-            choice({
-                literal("]"),
-                sequence({elements, ws, literal("]")})
-            }),
-            ws
-        });
+        auto ws       = space();
+        auto elements = sequence({ json(), zero_or_more(sequence({ literal(","), ws, json() })) });
+        return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) });
     });
 }
 
 common_peg_parser common_peg_parser_builder::json() {
     return rule("json-value", [this]() {
-        return choice({
-            json_object(),
-            json_array(),
-            json_string(),
-            json_number(),
-            json_bool(),
-            json_null()
-        });
+        return choice({ json_object(), json_array(), json_string(), json_number(), json_bool(), json_null() });
     });
 }
 
@@ -1145,17 +1316,76 @@ common_peg_parser common_peg_parser_builder::json_member(const std::string & key
     });
 }
 
-
-static std::string gbnf_escape_char_class(char c) {
-    switch (c) {
-        case '\n': return "\\n";
-        case '\t': return "\\t";
-        case '\r': return "\\r";
-        case '\\': return "\\\\";
-        case ']':  return "\\]";
-        case '[':  return "\\[";
-        default:   return std::string(1, c);
+static std::string gbnf_escape_char_class(uint32_t c) {
+    if (c == '-' || c == ']' || c == '[' || c == '\\') {
+        return "\\" + std::string(1, (char) c);
     }
+    // Escape whitespace control characters
+    if (c == '\n') {
+        return "\\n";
+    }
+    if (c == '\t') {
+        return "\\t";
+    }
+    if (c == '\r') {
+        return "\\r";
+    }
+
+    // Printable ASCII
+    if (c >= 0x20 && c <= 0x7E) {
+        return std::string(1, (char) c);
+    }
+
+    // Hex escape
+    char         buf[16];
+    const char * hex = "0123456789ABCDEF";
+
+    if (c <= 0xFF) {
+        buf[0] = '\\';
+        buf[1] = 'x';
+        buf[2] = hex[(c >> 4) & 0xF];
+        buf[3] = hex[c & 0xF];
+        buf[4] = '\0';
+    } else if (c <= 0xFFFF) {
+        buf[0] = '\\';
+        buf[1] = 'u';
+        buf[2] = hex[(c >> 12) & 0xF];
+        buf[3] = hex[(c >> 8) & 0xF];
+        buf[4] = hex[(c >> 4) & 0xF];
+        buf[5] = hex[c & 0xF];
+        buf[6] = '\0';
+    } else {
+        buf[0] = '\\';
+        buf[1] = 'U';
+        for (int i = 0; i < 8; i++) {
+            buf[2 + i] = hex[(c >> ((7 - i) * 4)) & 0xF];
+        }
+        buf[10] = '\0';
+    }
+
+    return std::string(buf);
+}
+
+static std::string codepoints_to_utf8(const std::vector<uint32_t> & cps) {
+    std::string s;
+    for (uint32_t cp : cps) {
+        if (cp < 0x80) {
+            s += (char) cp;
+        } else if (cp < 0x800) {
+            s += (char) (0xC0 | (cp >> 6));
+            s += (char) (0x80 | (cp & 0x3F));
+        } else if (cp < 0x10000) {
+            s += (char) (0xE0 | (cp >> 12));
+            s += (char) (0x80 | ((cp >> 6) & 0x3F));
+            s += (char) (0x80 | (cp & 0x3F));
+        } else {
+            s += (char) (0xF0 | (cp >> 18));
+            s += (char) (0x80 | ((cp >> 12) & 0x3F));
+            s += (char) (0x80 | ((cp >> 6) & 0x3F));
+            s += (char) (0x80 | (cp & 0x3F));
+        }
+    }
+    return s;
 }
 
 static std::string gbnf_excluding_pattern(const std::vector<std::string> & strings) {
@@ -1168,17 +1398,17 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
             pattern += " | ";
         }
 
-        const auto & pre = pieces[i].prefix;
+        const auto & pre   = pieces[i].prefix;
         const auto & chars = pieces[i].next_chars;
 
         std::string cls;
-        cls.reserve(chars.size());
-        for (const auto & ch : chars) {
+        cls.reserve(chars.size() * 4);
+        for (uint32_t ch : chars) {
             cls += gbnf_escape_char_class(ch);
         }
 
         if (!pre.empty()) {
-            pattern += gbnf_format_literal(pre) + " [^" + cls + "]";
+            pattern += gbnf_format_literal(codepoints_to_utf8(pre)) + " [^" + cls + "]";
         } else {
             pattern += "[^" + cls + "]";
         }
@@ -1187,58 +1417,56 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
     return "(" + pattern + ")*";
 }
 
-static std::unordered_set<std::string> collect_reachable_rules(
-    const common_peg_arena & arena,
-    const common_peg_parser_id & rule
-) {
+static std::unordered_set<std::string> collect_reachable_rules(const common_peg_arena &     arena,
+                                                               const common_peg_parser_id & rule) {
     std::unordered_set<std::string> reachable;
     std::unordered_set<std::string> visited;
 
     std::function<void(common_peg_parser_id)> visit = [&](common_peg_parser_id id) {
         const auto & parser = arena.get(id);
 
-        std::visit([&](const auto & p) {
-            using T = std::decay_t<decltype(p)>;
+        std::visit(
+            [&](const auto & p) {
+                using T = std::decay_t<decltype(p)>;
 
-            if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
-                          std::is_same_v<T, common_peg_start_parser> ||
-                          std::is_same_v<T, common_peg_end_parser> ||
-                          std::is_same_v<T, common_peg_until_parser> ||
-                          std::is_same_v<T, common_peg_literal_parser> ||
-                          std::is_same_v<T, common_peg_chars_parser> ||
-                          std::is_same_v<T, common_peg_space_parser> ||
-                          std::is_same_v<T, common_peg_any_parser> ||
-                          std::is_same_v<T, common_peg_json_string_parser>) {
-                // These parsers do not have any children
-            } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-                for (auto child : p.children) {
-                    visit(child);
-                }
-            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-                for (auto child : p.children) {
-                    visit(child);
-                }
-            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser> ||
-                                 std::is_same_v<T, common_peg_and_parser> ||
-                                 std::is_same_v<T, common_peg_not_parser> ||
-                                 std::is_same_v<T, common_peg_tag_parser> ||
-                                 std::is_same_v<T, common_peg_atomic_parser> ||
-                                 std::is_same_v<T, common_peg_schema_parser>) {
-                visit(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-                if (visited.find(p.name) == visited.end()) {
-                    visited.insert(p.name);
-                    reachable.insert(p.name);
+                if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
+                              std::is_same_v<T, common_peg_start_parser> || std::is_same_v<T, common_peg_end_parser> ||
+                              std::is_same_v<T, common_peg_until_parser> ||
+                              std::is_same_v<T, common_peg_literal_parser> ||
+                              std::is_same_v<T, common_peg_chars_parser> ||
+                              std::is_same_v<T, common_peg_space_parser> || std::is_same_v<T, common_peg_any_parser> ||
+                              std::is_same_v<T, common_peg_json_string_parser>) {
+                    // These parsers do not have any children
+                } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                    for (auto child : p.children) {
+                        visit(child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                    for (auto child : p.children) {
+                        visit(child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_repetition_parser> ||
+                                     std::is_same_v<T, common_peg_and_parser> ||
+                                     std::is_same_v<T, common_peg_not_parser> ||
+                                     std::is_same_v<T, common_peg_tag_parser> ||
+                                     std::is_same_v<T, common_peg_atomic_parser> ||
+                                     std::is_same_v<T, common_peg_schema_parser>) {
                     visit(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                    if (visited.find(p.name) == visited.end()) {
+                        visited.insert(p.name);
+                        reachable.insert(p.name);
+                        visit(p.child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
+                    // Traverse rules so we pick up everything
+                    auto referenced_rule = arena.get_rule(p.name);
+                    visit(referenced_rule);
+                } else {
+                    static_assert(is_always_false_v<T>);
                 }
-            } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
-                // Traverse rules so we pick up everything
-                auto referenced_rule = arena.get_rule(p.name);
-                visit(referenced_rule);
-            } else {
-                static_assert(is_always_false_v<T>);
-            }
-        }, parser);
+            },
+            parser);
     };
 
     visit(rule);
@@ -1251,129 +1479,136 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
     std::function<std::string(common_peg_parser_id)> to_gbnf = [&](common_peg_parser_id id) -> std::string {
         const auto & parser = parsers_.at(id);
 
-        return std::visit([&](const auto & p) -> std::string {
-            using T = std::decay_t<decltype(p)>;
+        return std::visit(
+            [&](const auto & p) -> std::string {
+                using T = std::decay_t<decltype(p)>;
 
-            if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
-                          std::is_same_v<T, common_peg_start_parser> ||
-                          std::is_same_v<T, common_peg_end_parser>) {
-                return "";
-            } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
-                return gbnf_format_literal(p.literal);
-            } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-                std::string s;
-                for (const auto & child : p.children) {
-                    if (!s.empty()) {
-                        s += " ";
+                if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
+                              std::is_same_v<T, common_peg_start_parser> || std::is_same_v<T, common_peg_end_parser>) {
+                    return "";
+                } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
+                    return gbnf_format_literal(p.literal);
+                } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                    std::string s;
+                    for (const auto & child : p.children) {
+                        if (!s.empty()) {
+                            s += " ";
+                        }
+                        auto         child_gbnf   = to_gbnf(child);
+                        const auto & child_parser = parsers_.at(child);
+                        if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
+                            std::holds_alternative<common_peg_sequence_parser>(child_parser) ||
+                            std::holds_alternative<common_peg_tag_parser>(child_parser) ||
+                            std::holds_alternative<common_peg_atomic_parser>(child_parser)) {
+                            s += "(" + child_gbnf + ")";
+                        } else {
+                            s += child_gbnf;
+                        }
                     }
-                    auto child_gbnf = to_gbnf(child);
-                    const auto & child_parser = parsers_.at(child);
+                    return s;
+                } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                    std::string s;
+                    for (const auto & child : p.children) {
+                        if (!s.empty()) {
+                            s += " | ";
+                        }
+                        auto         child_gbnf   = to_gbnf(child);
+                        const auto & child_parser = parsers_.at(child);
+                        if (std::holds_alternative<common_peg_choice_parser>(child_parser)) {
+                            s += "(" + child_gbnf + ")";
+                        } else {
+                            s += child_gbnf;
+                        }
+                    }
+                    return s;
+                } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
+                    auto         child_gbnf   = to_gbnf(p.child);
+                    const auto & child_parser = parsers_.at(p.child);
                     if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
-                        std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
-                        s += "(" + child_gbnf + ")";
-                    } else {
-                        s += child_gbnf;
+                        std::holds_alternative<common_peg_sequence_parser>(child_parser) ||
+                        std::holds_alternative<common_peg_tag_parser>(child_parser) ||
+                        std::holds_alternative<common_peg_atomic_parser>(child_parser)) {
+                        child_gbnf = "(" + child_gbnf + ")";
                     }
-                }
-                return s;
-            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-                std::string s;
-                for (const auto & child : p.children) {
-                    if (!s.empty()) {
-                        s += " | ";
+                    if (p.min_count == 0 && p.max_count == 1) {
+                        return child_gbnf + "?";
                     }
-                    auto child_gbnf = to_gbnf(child);
-                    const auto & child_parser = parsers_.at(child);
-                    if (std::holds_alternative<common_peg_choice_parser>(child_parser)) {
-                        s += "(" + child_gbnf + ")";
-                    } else {
-                        s += child_gbnf;
+                    if (p.min_count == 0 && p.max_count == -1) {
+                        return child_gbnf + "*";
                     }
-                }
-                return s;
-            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
-                auto child_gbnf = to_gbnf(p.child);
-                const auto & child_parser = parsers_.at(p.child);
-                if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
-                    std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
-                    child_gbnf = "(" + child_gbnf + ")";
-                }
-                if (p.min_count == 0 && p.max_count == 1) {
-                    return child_gbnf + "?";
-                }
-                if (p.min_count == 0 && p.max_count == -1) {
-                    return child_gbnf + "*";
-                }
-                if (p.min_count == 1 && p.max_count == -1) {
-                    return child_gbnf + "+";
-                }
-                if (p.max_count == -1) {
-                    return child_gbnf + "{" + std::to_string(p.min_count) + ",}";
-                }
-                if (p.min_count == p.max_count) {
-                    if (p.min_count == 1) {
-                        return child_gbnf;
+                    if (p.min_count == 1 && p.max_count == -1) {
+                        return child_gbnf + "+";
                     }
-                    return child_gbnf + "{" + std::to_string(p.min_count) + "}";
-                }
-                return child_gbnf + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
-            } else if constexpr (std::is_same_v<T, common_peg_and_parser> || std::is_same_v<T, common_peg_not_parser>) {
-                return "";  // Lookahead not supported in GBNF
-            } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
-                return ".";
-            } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
-                return "space";
-            } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
-                std::string result = p.pattern;
-                if (p.min_count == 0 && p.max_count == 1) {
-                    return result + "?";
-                }
-                if (p.min_count == 0 && p.max_count == -1) {
-                    return result + "*";
-                }
-                if (p.min_count == 1 && p.max_count == -1) {
-                    return result + "+";
-                }
-                if (p.max_count == -1) {
-                    return result + "{" + std::to_string(p.min_count) + ",}";
-                }
-                if (p.min_count == p.max_count) {
-                    if (p.min_count == 1) {
-                        return result;
+                    if (p.max_count == -1) {
+                        return child_gbnf + "{" + std::to_string(p.min_count) + ",}";
                     }
-                    return result + "{" + std::to_string(p.min_count) + "}";
-                }
-                return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
-            } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-                return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
-            } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
-                if (p.delimiters.empty()) {
-                    return ".*";
-                }
-                return gbnf_excluding_pattern(p.delimiters);
-            } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-                if (p.schema) {
-                    if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && p.schema->at("type") == "string") {
-                        // TODO: Implement more comprehensive grammar generation for raw strings.
-                        // For now, use the grammar emitted from the underlying parser.
-                        return to_gbnf(p.child);
+                    if (p.min_count == p.max_count) {
+                        if (p.min_count == 1) {
+                            return child_gbnf;
+                        }
+                        return child_gbnf + "{" + std::to_string(p.min_count) + "}";
                     }
-                    return builder.add_schema(p.name, *p.schema);
+                    return child_gbnf + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
+                } else if constexpr (std::is_same_v<T, common_peg_and_parser> ||
+                                     std::is_same_v<T, common_peg_not_parser>) {
+                    return "";  // Lookahead not supported in GBNF
+                } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
+                    return ".";
+                } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
+                    return "space";
+                } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
+                    std::string result = p.pattern;
+                    if (p.min_count == 0 && p.max_count == 1) {
+                        return result + "?";
+                    }
+                    if (p.min_count == 0 && p.max_count == -1) {
+                        return result + "*";
+                    }
+                    if (p.min_count == 1 && p.max_count == -1) {
+                        return result + "+";
+                    }
+                    if (p.max_count == -1) {
+                        return result + "{" + std::to_string(p.min_count) + ",}";
+                    }
+                    if (p.min_count == p.max_count) {
+                        if (p.min_count == 1) {
+                            return result;
+                        }
+                        return result + "{" + std::to_string(p.min_count) + "}";
+                    }
+                    return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
+                } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
+                    return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
+                } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
+                    if (p.delimiters.empty()) {
+                        return ".*";
+                    }
+                    return gbnf_excluding_pattern(p.delimiters);
+                } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
+                    if (p.schema) {
+                        if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() &&
+                            p.schema->at("type") == "string") {
+                            // TODO: Implement more comprehensive grammar generation for raw strings.
+                            // For now, use the grammar emitted from the underlying parser.
+                            return to_gbnf(p.child);
+                        }
+                        return builder.add_schema(p.name, *p.schema);
+                    }
+                    return to_gbnf(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                    return p.name;
+                } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
+                    // Refs should not exist after flattening, but kept just in case
+                    return p.name;
+                } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+                    return to_gbnf(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+                    return to_gbnf(p.child);
+                } else {
+                    static_assert(is_always_false_v<T>);
                 }
-                return to_gbnf(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-                return p.name;
-            } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
-                // Refs should not exist after flattening, but kept just in case
-                return p.name;
-            } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
-                return to_gbnf(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
-                return to_gbnf(p.child);
-            } else {
-                static_assert(is_always_false_v<T>);
-            }
-        }, parser);
+            },
+            parser);
     };
 
     // Collect reachable rules
@@ -1432,80 +1667,121 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
 static nlohmann::json serialize_parser_variant(const common_peg_parser_variant & variant) {
     using json = nlohmann::json;
 
-    return std::visit([](const auto & p) -> json {
-        using T = std::decay_t<decltype(p)>;
+    return std::visit(
+        [](const auto & p) -> json {
+            using T = std::decay_t<decltype(p)>;
 
-        if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
-            return json{{"type", "epsilon"}};
-        } else if constexpr (std::is_same_v<T, common_peg_start_parser>) {
-            return json{{"type", "start"}};
-        } else if constexpr (std::is_same_v<T, common_peg_end_parser>) {
-            return json{{"type", "end"}};
-        } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
-            return json{{"type", "literal"}, {"literal", p.literal}};
-        } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-            return json{{"type", "sequence"}, {"children", p.children}};
-        } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-            return json{{"type", "choice"}, {"children", p.children}};
-        } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
-            return json{
-                {"type", "repetition"},
-                {"child", p.child},
-                {"min_count", p.min_count},
-                {"max_count", p.max_count}
-            };
-        } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
-            return json{{"type", "and"}, {"child", p.child}};
-        } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
-            return json{{"type", "not"}, {"child", p.child}};
-        } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
-            return json{{"type", "any"}};
-        } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
-            return json{{"type", "space"}};
-        } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
-            json ranges = json::array();
-            for (const auto & range : p.ranges) {
-                ranges.push_back({{"start", range.start}, {"end", range.end}});
+            if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
+                return json{
+                    { "type", "epsilon" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_start_parser>) {
+                return json{
+                    { "type", "start" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_end_parser>) {
+                return json{
+                    { "type", "end" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
+                return json{
+                    { "type",    "literal" },
+                    { "literal", p.literal }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                return json{
+                    { "type",     "sequence" },
+                    { "children", p.children }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                return json{
+                    { "type",     "choice"   },
+                    { "children", p.children }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
+                return json{
+                    { "type",      "repetition" },
+                    { "child",     p.child      },
+                    { "min_count", p.min_count  },
+                    { "max_count", p.max_count  }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
+                return json{
+                    { "type",  "and"   },
+                    { "child", p.child }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
+                return json{
+                    { "type",  "not"   },
+                    { "child", p.child }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
+                return json{
+                    { "type", "any" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
+                return json{
+                    { "type", "space" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
+                json ranges = json::array();
+                for (const auto & range : p.ranges) {
+                    ranges.push_back({
+                        { "start", range.start },
+                        { "end",   range.end   }
+                    });
+                }
+                return json{
+                    { "type",      "chars"     },
+                    { "pattern",   p.pattern   },
+                    { "ranges",    ranges      },
+                    { "negated",   p.negated   },
+                    { "min_count", p.min_count },
+                    { "max_count", p.max_count }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
+                return json{
+                    { "type", "json_string" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
+                return json{
+                    { "type",       "until"      },
+                    { "delimiters", p.delimiters }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
+                return json{
+                    { "type",   "schema"                       },
+                    { "child",  p.child                        },
+                    { "name",   p.name                         },
+                    { "schema", p.schema ? *p.schema : nullptr },
+                    { "raw",    p.raw                          }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                return json{
+                    { "type",    "rule"    },
+                    { "name",    p.name    },
+                    { "child",   p.child   },
+                    { "trigger", p.trigger }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
+                return json{
+                    { "type", "ref"  },
+                    { "name", p.name }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+                return json{
+                    { "type",  "atomic" },
+                    { "child", p.child  }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+                return json{
+                    { "type",  "tag"   },
+                    { "child", p.child },
+                    { "tag",   p.tag   }
+                };
             }
-            return json{
-                {"type", "chars"},
-                {"pattern", p.pattern},
-                {"ranges", ranges},
-                {"negated", p.negated},
-                {"min_count", p.min_count},
-                {"max_count", p.max_count}
-            };
-        } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-            return json{{"type", "json_string"}};
-        } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
-            return json{{"type", "until"}, {"delimiters", p.delimiters}};
-        } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-            return json{
-                {"type", "schema"},
-                {"child", p.child},
-                {"name", p.name},
-                {"schema", p.schema ? *p.schema : nullptr},
-                {"raw", p.raw}
-            };
-        } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-            return json{
-                {"type", "rule"},
-                {"name", p.name},
-                {"child", p.child},
-                {"trigger", p.trigger}
-            };
-        } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
-            return json{{"type", "ref"}, {"name", p.name}};
-        } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
-            return json{{"type", "atomic"}, {"child", p.child}};
-        } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
-            return json{
-                {"type", "tag"},
-                {"child", p.child},
-                {"tag", p.tag}
-            };
-        }
-    }, variant);
+        },
+        variant);
 }
 
 nlohmann::json common_peg_arena::to_json() const {
@@ -1514,9 +1790,9 @@ nlohmann::json common_peg_arena::to_json() const {
         parsers.push_back(serialize_parser_variant(parser));
     }
     return nlohmann::json{
-        {"parsers", parsers},
-        {"rules", rules_},
-        {"root", root_}
+        { "parsers", parsers },
+        { "rules",   rules_  },
+        { "root",    root_   }
     };
 }
 
@@ -1540,41 +1816,38 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         if (!j.contains("literal") || !j["literal"].is_string()) {
             throw std::runtime_error("literal parser missing or invalid 'literal' field");
         }
-        return common_peg_literal_parser{j["literal"]};
+        return common_peg_literal_parser{ j["literal"] };
     }
     if (type == "sequence") {
         if (!j.contains("children") || !j["children"].is_array()) {
             throw std::runtime_error("sequence parser missing or invalid 'children' field");
         }
-        return common_peg_sequence_parser{j["children"].get<std::vector<common_peg_parser_id>>()};
+        return common_peg_sequence_parser{ j["children"].get<std::vector<common_peg_parser_id>>() };
     }
     if (type == "choice") {
         if (!j.contains("children") || !j["children"].is_array()) {
             throw std::runtime_error("choice parser missing or invalid 'children' field");
         }
-        return common_peg_choice_parser{j["children"].get<std::vector<common_peg_parser_id>>()};
+        return common_peg_choice_parser{ j["children"].get<std::vector<common_peg_parser_id>>() };
     }
     if (type == "repetition") {
         if (!j.contains("child") || !j.contains("min_count") || !j.contains("max_count")) {
             throw std::runtime_error("repetition parser missing required fields");
         }
-        return common_peg_repetition_parser{
-            j["child"].get<common_peg_parser_id>(),
-            j["min_count"].get<int>(),
-            j["max_count"].get<int>()
-        };
+        return common_peg_repetition_parser{ j["child"].get<common_peg_parser_id>(), j["min_count"].get<int>(),
+                                             j["max_count"].get<int>() };
     }
     if (type == "and") {
         if (!j.contains("child")) {
             throw std::runtime_error("and parser missing 'child' field");
         }
-        return common_peg_and_parser{j["child"].get<common_peg_parser_id>()};
+        return common_peg_and_parser{ j["child"].get<common_peg_parser_id>() };
     }
     if (type == "not") {
         if (!j.contains("child")) {
             throw std::runtime_error("not parser missing 'child' field");
         }
-        return common_peg_not_parser{j["child"].get<common_peg_parser_id>()};
+        return common_peg_not_parser{ j["child"].get<common_peg_parser_id>() };
     }
     if (type == "any") {
         return common_peg_any_parser{};
@@ -1583,23 +1856,20 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         return common_peg_space_parser{};
     }
     if (type == "chars") {
-        if (!j.contains("pattern") || !j.contains("ranges") || !j.contains("negated") ||
-            !j.contains("min_count") || !j.contains("max_count")) {
+        if (!j.contains("pattern") || !j.contains("ranges") || !j.contains("negated") || !j.contains("min_count") ||
+            !j.contains("max_count")) {
             throw std::runtime_error("chars parser missing required fields");
         }
         common_peg_chars_parser parser;
-        parser.pattern = j["pattern"];
-        parser.negated = j["negated"];
+        parser.pattern   = j["pattern"];
+        parser.negated   = j["negated"];
         parser.min_count = j["min_count"];
         parser.max_count = j["max_count"];
         for (const auto & range_json : j["ranges"]) {
             if (!range_json.contains("start") || !range_json.contains("end")) {
                 throw std::runtime_error("char_range missing 'start' or 'end' field");
             }
-            parser.ranges.push_back({
-                range_json["start"].get<uint32_t>(),
-                range_json["end"].get<uint32_t>()
-            });
+            parser.ranges.push_back({ range_json["start"].get<uint32_t>(), range_json["end"].get<uint32_t>() });
         }
         return parser;
     }
@@ -1610,7 +1880,7 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         if (!j.contains("delimiters") || !j["delimiters"].is_array()) {
             throw std::runtime_error("until parser missing or invalid 'delimiters' field");
         }
-        return common_peg_until_parser{j["delimiters"].get<std::vector<std::string>>()};
+        return common_peg_until_parser{ j["delimiters"].get<std::vector<std::string>>() };
     }
     if (type == "schema") {
         if (!j.contains("child") || !j.contains("name") || !j.contains("schema") || !j.contains("raw")) {
@@ -1618,7 +1888,7 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         }
         common_peg_schema_parser parser;
         parser.child = j["child"].get<common_peg_parser_id>();
-        parser.name = j["name"];
+        parser.name  = j["name"];
         if (!j["schema"].is_null()) {
             parser.schema = std::make_shared<nlohmann::ordered_json>(j["schema"]);
         }
@@ -1629,17 +1899,14 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         if (!j.contains("name") || !j.contains("child") || !j.contains("trigger")) {
             throw std::runtime_error("rule parser missing required fields");
         }
-        return common_peg_rule_parser{
-            j["name"].get<std::string>(),
-            j["child"].get<common_peg_parser_id>(),
-            j["trigger"].get<bool>()
-        };
+        return common_peg_rule_parser{ j["name"].get<std::string>(), j["child"].get<common_peg_parser_id>(),
+                                       j["trigger"].get<bool>() };
     }
     if (type == "ref") {
         if (!j.contains("name") || !j["name"].is_string()) {
             throw std::runtime_error("ref parser missing or invalid 'name' field");
         }
-        return common_peg_ref_parser{j["name"]};
+        return common_peg_ref_parser{ j["name"] };
     }
     if (type == "atomic") {
         if (!j.contains("child")) {
diff --git a/common/peg-parser.h b/common/peg-parser.h
index 1cd640365f..5d08cf6d47 100644
--- a/common/peg-parser.h
+++ b/common/peg-parser.h
@@ -4,6 +4,7 @@
 
 #include <memory>
 #include <unordered_map>
+#include <unordered_set>
 #include <string>
 #include <string_view>
 #include <functional>
@@ -111,6 +112,8 @@ class common_peg_ast_arena {
 
     void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const;
     void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const;
+
+    std::string dump();
 };
 
 struct common_peg_parse_result {
@@ -139,6 +142,7 @@ struct common_peg_parse_result {
 struct common_peg_parse_context {
     std::string input;
     bool is_partial;
+    bool debug = false;  // Enable debug output for parser tracing
     common_peg_ast_arena ast;
 
     int parse_depth;
@@ -299,6 +303,8 @@ class common_peg_arena {
     friend class common_peg_parser_builder;
 
   private:
+    std::string dump_impl(common_peg_parser_id id, std::unordered_set<common_peg_parser_id> & visited) const;
+
     common_peg_parser_id add_parser(common_peg_parser_variant parser);
     void add_rule(const std::string & name, common_peg_parser_id id);
 
diff --git a/docs/autoparser.md b/docs/autoparser.md
new file mode 100644
index 0000000000..3c77c4d304
--- /dev/null
+++ b/docs/autoparser.md
@@ -0,0 +1,513 @@
+# Unified Auto-Parser Architecture
+
+The auto-parser automatically analyzes chat templates to determine how to parse model outputs, including content, reasoning, and tool calls.
+
+## Overview
+
+The unified auto-parser uses a two-phase incremental analysis approach:
+
+1. **Phase 1: Content & Reasoning Analysis** - Analyzes how the template handles basic content and reasoning, without considering tools
+2. **Phase 2: Tool Call Analysis** - Analyzes tool calling patterns, layered on top of Phase 1
+
+## Data Structures
+
+### content_structure (Phase 1 Result)
+
+Describes how the template handles content and reasoning:
+
+```cpp
+struct content_structure {
+    enum reasoning_mode_type {
+        REASONING_NONE,         // No reasoning markers detected
+        REASONING_OPTIONAL,     // <think>...</think> may appear before content
+        REASONING_FORCED_OPEN,  // Template ends with open reasoning tag OR starts implicitly (empty start, present end)
+    };
+
+    reasoning_mode_type reasoning_mode = REASONING_NONE;
+    std::string         reasoning_start;  // e.g., "<think>", "<|START_THINKING|>"
+    std::string         reasoning_end;    // e.g., "</think>", "<|END_THINKING|>"
+
+    // Content wrapping mode
+    enum content_mode_type {
+        CONTENT_PLAIN,                   // No content markers
+        CONTENT_ALWAYS_WRAPPED,          // <response>...</response> always present
+        CONTENT_WRAPPED_WITH_REASONING,  // Content wrapped only when reasoning present
+    };
+
+    content_mode_type content_mode = CONTENT_PLAIN;
+    std::string       content_start;  // e.g., "<response>", "<|START_RESPONSE|>"
+    std::string       content_end;    // e.g., "</response>", "<|END_RESPONSE|>"
+};
+```
+
+### tool_call_structure (Phase 2 Result)
+
+Describes how the template formats tool calls:
+
+```cpp
+struct tool_call_structure {
+    bool supports_tools = false;
+
+    // Container markers (what wraps all tool calls)
+    std::string tool_section_start;  // e.g., "<tool_call>", "[TOOL_CALLS]", "<TOOLCALL>", ""
+    std::string tool_section_end;    // e.g., "</tool_call>", "]", "</TOOLCALL>", ""
+
+    // Function format (how individual functions are structured)
+    enum function_format {
+        FUNC_JSON_OBJECT,       // {"name": "X", "arguments": {...}}
+        FUNC_TAG_WITH_NAME,     // <function=X>{...}</function>
+        FUNC_TAG_NAME_ONLY,     // <X>...</X> where X is function name (rare)
+        FUNC_PREFIXED_INDEXED,  // <|tool_call_begin|>functions.X:0<|tool_call_argument_begin|>{...}<|tool_call_end|>
+        FUNC_NAME_AS_KEY,       // [{"function_name": {...arguments...}}] (Apertus-style)
+        FUNC_BRACKET_TAG,       // [TOOL_CALLS]X[CALL_ID]id[ARGS]{...} (Mistral Small 3.2 style)
+        FUNC_RECIPIENT_BASED,   // >>>recipient\n{content} where recipient is "all" (content) or function name (tools)
+        FUNC_MARKDOWN_CODE_BLOCK,  // Action:\n```json\n[{"tool_name": "X", ...}]\n``` (Cohere Command-R Plus)
+    };
+    function_format function_format = FUNC_JSON_OBJECT;
+
+    // For FUNC_JSON_OBJECT format - field names (may vary between templates)
+    std::string name_field = "name";       // Could be "tool_name", "function"
+    std::string args_field = "arguments";  // Could be "parameters", "params", "input"
+    std::string id_field;                  // Optional: "id", "tool_call_id", ""
+
+    // For FUNC_TAG_WITH_NAME format
+    std::string function_prefix;  // e.g., "<function="
+    std::string function_suffix;  // e.g., ">"
+    std::string function_close;   // e.g., "</function>"
+
+    // For FUNC_PREFIXED_INDEXED format (e.g., Kimi-K2)
+    std::string per_call_start;      // e.g., "<|tool_call_begin|>"
+    std::string function_namespace;  // e.g., "functions." (prefix before function name)
+    std::string args_marker;         // e.g., "<|tool_call_argument_begin|>"
+    std::string per_call_end;        // e.g., "<|tool_call_end|>"
+
+    // For FUNC_BRACKET_TAG format (e.g., Mistral Small 3.2)
+    std::string id_marker;  // e.g., "[CALL_ID]" - marker before tool call ID
+
+    // For FUNC_MARKDOWN_CODE_BLOCK format (Cohere Command-R Plus)
+    std::string code_block_marker;    // e.g., "Action:" - text marker before code block
+    std::string code_block_language;  // e.g., "json" - language identifier in code fence
+
+    // Argument format (how arguments are structured within a function)
+    enum argument_format {
+        ARGS_JSON,            // Standard JSON object: {"key": "value", ...}
+        ARGS_TAGGED,          // XML-style: <param=key>value</param>
+        ARGS_KEY_VALUE_TAGS,  // <arg_key>key</arg_key><arg_value>value</arg_value> (GLM-4.6)
+    };
+    argument_format argument_format = ARGS_JSON;
+
+    // For ARGS_TAGGED format
+    std::string arg_prefix;     // e.g., "<param=", "<parameter="
+    std::string arg_suffix;     // e.g., ">"
+    std::string arg_close;      // e.g., "</param>", "</parameter>"
+    std::string arg_separator;  // e.g., "", "\n"
+
+    // Flag: template renders null content as "None" string, requires empty string instead
+    bool requires_nonnull_content = false;
+};
+```
+
+## Analysis Flow
+
+```console
+Template
+    |
+    v
+Phase 1: analyze_content_structure()
+    |-- detect_reasoning_markers() - compare outputs with reasoning_content vs without
+    |-- detect_content_markers() - render with content and detect wrapping
+    |-- detect_reasoning_mode() - check if prompt ends with open tag
+    |
+    v
+content_structure
+    |
+    v
+Phase 2: analyze_tool_structure()
+    |-- Check minja.supports_tool_calls
+    |-- Differential analysis for tool patterns
+    |-- Classify function format (JSON vs tagged)
+    |-- Classify argument format (JSON vs tagged)
+    |
+    v
+tool_call_structure
+    |
+    v
+generate_parser(content_structure, tool_call_structure)
+    |-- build_reasoning_block(content_structure)
+    |-- build_content_block(content_structure)
+    |-- build_tool_section(tool_call_structure, tools)
+    |-- Compose into final parser
+    |
+    v
+common_chat_params (parser, grammar, triggers, preserved_tokens)
+```
+
+## Entry Point
+
+The mechanism starts in `common/chat.cpp`, in `common_chat_templates_apply_jinja`:
+
+```cpp
+// 1. Analyze the template (two-phase)
+template_analysis_result analysis = template_analyzer::analyze_template(tmpl);
+
+// 2. Generate the parser and grammar
+auto auto_params = universal_peg_generator::generate_parser(analysis, tmpl, params);
+
+// 3. Use if it provides more than basic content handling
+if (auto_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY ||
+    auto_params.thinking_forced_open ||
+    !auto_params.parser.empty()) {
+    return auto_params;
+}
+```
+
+## Builder Methods
+
+The unified builder (`common_chat_peg_unified_builder`) provides high-level methods:
+
+- `build_reasoning_block(cs, reasoning_format, thinking_forced_open)` - Build reasoning parser
+- `build_content_block(cs, reasoning_format)` - Build content parser
+- `build_tool_section(ts, tools, parallel_tool_calls, force_tool_calls)` - Build tool section
+- `build_function(ts, name, schema)` - Build single function parser
+- `build_arguments(ts, schema)` - Build arguments parser
+
+## Key Templates Supported
+
+- **Granite** - `<think></think>` + `<response></response>` with tool calls
+- **Nemotron** - JSON tools with `<TOOLCALL>` wrapper
+- **Qwen/Hermes** - XML-style `<function=X><param=key>` format
+- **Command-R7B** - `<|START_THINKING|>`/`<|START_RESPONSE|>` + `<|START_ACTION|>` tools
+- **DeepSeek R1** - Forced thinking + complex tools
+- **Mistral Nemo** - `[TOOL_CALLS]` wrapper
+- **MiniMax** - `<minimax:tool_call>` wrapper with XML tools
+- **GLM-4.6** - `<minimax:tool_call>` + `<tool_call>name\n<arg_key>...<arg_value>...` format
+- **Kimi-K2** - `FUNC_PREFIXED_INDEXED` format with namespace and indices
+- **Mistral Small 3.2** - `FUNC_BRACKET_TAG` format with `[TOOL_CALLS]` markers
+- **Functionary v3.2** - `FUNC_RECIPIENT_BASED` format with `>>>` routing
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `common/chat-auto-parser.h` | Data structures and API declarations |
+| `common/chat-auto-parser-analyzer.cpp` | Phase 1 and Phase 2 analysis implementation |
+| `common/chat-auto-parser-generator.cpp` | PEG parser generator |
+| `common/chat-auto-parser-helpers.h/cpp` | Shared helper functions |
+| `common/chat-peg-parser.h/cpp` | Unified builder and mapper classes |
+| `common/chat.cpp` | Main entry point and wire-up |
+
+## Algorithm Details
+
+### Phase 1: Content & Reasoning Analysis
+
+#### Reasoning Detection (4 Methods)
+
+**Method 1: Differential Reasoning Content Analysis**
+
+- Render template with `reasoning_content` field present vs absent
+- Compare outputs to find markers between `THOUGHT_MARKER` and `CONTENT_MARKER`
+- If only closing tag found, derive opening tag using patterns:
+  - XML: `</tag>` → `<tag>`
+  - Special tokens: `<|END_X|>` → `<|START_X|>`, `<|/X|>` → `<|X|>`
+- Handles various tag formats including XML and special token formats
+
+**Method 2: Enable-Thinking Toggle Analysis**
+
+- Toggle `enable_thinking` context variable between true/false
+- Detects differences in generated prompts
+- Handles two scenarios:
+  - **Normal case**: enable_thinking=true adds reasoning markers
+  - **Reverse case**: enable_thinking=false adds empty thinking block (GLM-4.6 style)
+- Uses string difference analysis to extract markers
+- Validates extracted tags against blacklist of role markers
+
+**Method 3: Prompt Ending Analysis**
+
+- Checks if prompt ends with unclosed reasoning tag
+- Looks for trailing tags in prompt with `enable_thinking=true`
+- Differentiates between open tags (`<think>`) and close tags (`</think>`)
+- Handles blacklisted tags (role markers, system tokens)
+- Validates reasoning-like patterns (contains "think", "reason", "thought")
+
+**Method 4: Adjacent Tag Pair Detection**
+
+- Looks for patterns like `<minimax:tool_call></think>`, `<|START_THINKING|><|END_THINKING|>`, `[think][/think]`
+- Searches for predefined tag patterns in prompt
+- Validates tags are adjacent with only whitespace between
+- Supports both simple and complex token formats
+
+#### Content Detection Algorithm
+
+1. **Dual-Mode Rendering**: Render template with content marker in both thinking-enabled and thinking-disabled modes
+2. **Pattern Matching**: Search for known content wrapper patterns:
+   - `<|START_RESPONSE|>` / `<|END_RESPONSE|>`
+   - `<response>` / `</response>`
+   - `<output>` / `</output>`
+   - `<answer>` / `</answer>`
+   - `<|CHATBOT_TOKEN|>` / `<|END_OF_TURN_TOKEN|>`
+3. **Mode Classification**:
+   - `CONTENT_ALWAYS_WRAPPED`: Found in both thinking modes
+   - `CONTENT_WRAPPED_WITH_REASONING`: Found only with thinking enabled
+   - `CONTENT_PLAIN`: No wrapping detected
+
+#### Reasoning Mode Detection
+
+- **REASONING_FORCED_OPEN**:
+  - **Explicit**: Prompt ends with reasoning start marker (e.g., `<think>`).
+  - **Implicit**: reasoning end marker is present but start marker is empty (e.g., `[BEGIN FINAL RESPONSE]`).
+- **REASONING_OPTIONAL**: Markers present but not forced.
+- **REASONING_NONE**: No markers detected.
+
+### Phase 2: Tool Call Structure Analysis
+
+#### Differential Analysis Algorithm
+
+**Test Payload Strategy**:
+
+1. **Base**: User + Assistant with content only (no tools)
+2. **Tool 1**: User + Assistant with tool_calls (empty args)
+3. **Tool 2**: User + Assistant with tool_calls (with args)
+4. **Tool 3**: User + Assistant with multiple tool calls
+
+**Pattern Extraction Process**:
+
+1. Compute string differences between base and tool outputs
+2. Use `test_function_name` as reliable search anchor (using `rfind` for last occurrence)
+3. Extract structural elements:
+   - `tool_call_opener`: Common prefix before function name
+   - `tool_call_closer`: Common suffix after function calls
+   - `function_opener`: Tag immediately before function name
+   - `function_closer`: Tag after function content
+   - `parameter_key_prefix/suffix`: Argument wrapping patterns
+
+#### Format Classification Logic
+
+**FORMAT_JSON_NATIVE**:
+
+- Detected by `{"name":` pattern in `tool_call_opener`
+- Or XML markers with JSON structure
+
+**FORMAT_XML_CONSTRUCTED**:
+
+- `function_opener` starts with `<`
+- No substantial parameter markers
+
+**FORMAT_RECIPIENT_BASED**:
+
+- `tool_call_start_marker == function_opener`
+- No parameter markers
+- Opener doesn't start with structural chars
+
+**FORMAT_BRACKET_TAG**:
+
+- `function_name_suffix` contains bracket tags like `[CALL_ID]...[ARGS]`
+- `tool_call_start_marker` matches `[TOOL_CALLS]` pattern
+
+**FORMAT_PREFIXED_INDEXED**:
+
+- `function_opener` ends with `.` (namespace separator)
+- `function_name_suffix` starts with `:` followed by digit
+- Example: `functions.name:0<|tool_call_argument_begin|>`
+
+#### Specialized Format Handling
+
+**FUNC_PREFIXED_INDEXED (Kimi-K2)**:
+
+- Splits `function_opener` at last `>` to get `per_call_start` + `function_namespace`
+- Extracts `args_marker` from `function_name_suffix`
+- Derives `per_call_end` by matching structural patterns in `tool_call_closer`
+
+**FUNC_TAG_WITH_NAME (Functionary/Nemotron)**:
+
+- Detects nested vs non-nested formats
+- Uses overlap detection between `tool_section_start` and `function_prefix`
+- Handles double-wrapping prevention
+
+**ARGS_KEY_VALUE_TAGS (GLM-4.6)**:
+
+- Detects `<arg_key>key</arg_key><arg_value>value</arg_value>` pattern
+- Cleans up suffix to extract just the key closer
+
+**FUNC_RECIPIENT_BASED (Functionary v3.2)**:
+
+- Detects `>>>` recipient delimiter format
+- Routes to "all" for content, function name for tools
+- Uses same delimiter for both content and tool routing
+
+**FUNC_BRACKET_TAG (Mistral Small 3.2/Devstral)**:
+
+- Detects `[TOOL_CALLS]function_name[ARGS]{...}` pattern
+- Optional `[CALL_ID]id` marker for tool call identification
+- No section wrapper - each call starts independently
+
+### Generator Algorithms
+
+#### Unified Parser Building
+
+**Composition Strategy**:
+
+```cpp
+// Standard format
+sequence({ reasoning, space(), content, space(), tools, space(), content, end() })
+
+// With section markers
+sequence({ reasoning, space(), content_until(section_start), space(), tools, space(), content, end() })
+
+// Forced thinking handling
+optional(reasoning) when thinking_forced_open && tools present
+```
+
+**Trigger Word Detection**:
+
+- Uses `tool_section_start` as primary trigger
+- Falls back to `function_prefix` or `per_call_start`
+- Raw JSON uses regex pattern trigger
+
+**Lazy Grammar Optimization**:
+
+- Enabled by default for performance
+- Disabled when thinking forced open
+- Disabled when no clear trigger word exists
+
+## Testing & Debugging
+
+### Comprehensive Test Coverage
+
+The test suite covers:
+
+**Reasoning Models**:
+
+- Qwen-QwQ-32B (forced-open thinking)
+- DeepSeek R1 variants (reasoning only)
+- IBM Granite (reasoning + tools)
+- ByteDance Seed-OSS (custom reasoning tags)
+- Ministral-3-14B-Reasoning
+- llama-cpp-deepseek-r1
+
+**Tool Call Formats**:
+
+- JSON: Llama 3.x, Mistral Nemo, Hermes, MiMo-VL
+- XML: Nemotron, Qwen3-Coder, MiniMax
+- Tagged: GLM-4.6 (key-value tags)
+- Bracket-tag: Mistral Small 3.2, Devstral
+- Prefixed-indexed: Kimi-K2 variants
+- Name-as-key: Apertus-8B
+- Recipient-based: Functionary v3.2
+
+**Edge Cases**:
+
+- Streaming/partial parsing
+- Empty content with tools
+- Parallel tool calls
+- Forced thinking mode
+- Multi-byte Unicode markers
+- Null content handling
+- Multi-line code in tool arguments
+- Custom reasoning tags (ByteDance Seed-OSS)
+
+### Debug Tools
+
+**Template Debugger**: `tests/debug-template-parser.cpp`
+
+- Usage: `./bin/debug-template-parser path/to/template.jinja`
+- Shows detected format, markers, generated parser, and GBNF grammar
+
+**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2`
+
+- Shows detailed analysis steps
+- Displays pattern extraction results
+- Lists generated parser structure
+
+**PEG Test Builder**: Fluent API for creating test cases
+
+```cpp
+auto tst = peg_tester("template.jinja");
+tst.test("input")
+   .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+   .tools({tool})
+   .expect(expected_message)
+   .run();
+```
+
+## Adding Support for New Templates
+
+To support a new template format:
+
+1. **If it follows standard patterns** - The auto-parser should detect it automatically
+2. **If it has unique markers** - Add the markers to the detection patterns in:
+   - `detect_reasoning_markers()` for reasoning tags
+   - `detect_content_markers()` for content wrappers
+   - `extract_patterns_from_differences()` for tool call patterns
+3. **If it needs special handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block
+
+## Edge Cases and Quirks
+
+1. **Forced Thinking**: If `enable_thinking` is true but the model has already started a thought block (e.g., ended the prompt with `<think>`), the parser enters "forced thinking" mode where it immediately expects reasoning content.
+2. **Ambiguous Content**: Templates that mix content and tool calls without clear delimiters can be tricky. The analyzer tries to find "common" start/end patterns across multiple examples to be robust.
+3. **Double Wrapping**: Some templates (e.g., Functionary) use the same string for both the tool section start and the function prefix (e.g., `<function=`). The analyzer detects this overlap and prevents double-wrapping in the generated parser.
+4. **Null Content Rendering**: Some templates render `null` content as Python "None" string. The analyzer detects this and patches content to empty string.
+5. **Multi-byte Unicode Markers**: Some templates use special Unicode characters in markers that require careful handling in GBNF generation.
+
+## State of the Autoparser (Jan 2026)
+
+As of January 2026, the unified auto-parser successfully handles major template families including DeepSeek V3/R1, Llama 3.x (native JSON), GLM-4/4.6, and standard XML/JSON formats. It also supports Functionary v3.1/v3.2, Mistral variants, and specialized formats like Kimi-K2's prefixed-indexed structure.
+
+### Tested Templates
+
+The following templates have active tests in `tests/test-chat.cpp`:
+
+| Template | Format | Notes |
+|----------|--------|-------|
+| DeepSeek V3.1 | `FUNC_JSON_OBJECT` | Forced thinking mode |
+| DeepSeek R1 Distill (Llama/Qwen) | Reasoning only | Forced-open thinking |
+| llama-cpp-deepseek-r1 | Reasoning only | Forced-open thinking |
+| GLM-4.6 | `ARGS_KEY_VALUE_TAGS` | `<tool_call>name\n<arg_key>...<arg_value>...` format |
+| Kimi-K2 / Kimi-K2-Instruct / Kimi-K2-Thinking | `FUNC_PREFIXED_INDEXED` | `functions.name:0` with special markers |
+| Apertus-8B-Instruct | `FUNC_NAME_AS_KEY` | `{"function_name": {...}}` format |
+| MiniMax-M2 | `FUNC_TAG_WITH_NAME` | XML invoke with parameter tags |
+| NVIDIA-Nemotron-Nano-v2 | `FUNC_JSON_OBJECT` | `<TOOLCALL>` wrapper (nested) |
+| Mistral-Nemo-Instruct-2407 | `FUNC_JSON_OBJECT` | `[TOOL_CALLS]` wrapper with id field |
+| Functionary v3.1 | `FUNC_TAG_WITH_NAME` | `<function=X>` non-nested format |
+| Functionary v3.2 | `FUNC_RECIPIENT_BASED` | `>>>` recipient delimiter format |
+| MiMo-VL / Hermes 3 / Qwen 2.5 | `FUNC_JSON_OBJECT` | `<tool_call>` wrapper |
+| Apriel 1.5 | `FUNC_JSON_OBJECT` | `<tool_calls>` wrapper with JSON array |
+| Apriel 1.6 Thinker | Reasoning only | Implicit reasoning start |
+| Cohere Command-R7B | `FUNC_JSON_OBJECT` | `START_RESPONSE/ACTION/THINKING` markers |
+| Mistral Small 3.2 | `FUNC_BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` with ID |
+| Devstral | `FUNC_BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` without ID |
+| Ministral-3-14B-Reasoning | Custom reasoning | `[THINK]...[/THINK]` tags |
+| IBM Granite | `FUNC_JSON_OBJECT` | `<think></think>` + `<response></response>` |
+| ByteDance Seed-OSS | `FUNC_TAG_WITH_NAME` | Custom `<seed:think>` and `<seed:tool_call>` tags |
+| Qwen3-Coder | `FUNC_TAG_WITH_NAME` | XML-style tool format |
+| Cohere Command-R Plus | `FUNC_MARKDOWN_CODE_BLOCK` | `Action:\n\`\`\`json\n[...]\n\`\`\`` format |
+
+### Currently Unsupported Templates
+
+| Template Family | Model / Variant | Issue Description |
+|-----------------|-----------------|-------------------|
+| **OpenAI** | `GPT-OSS` | Complex channel markers need new format |
+
+### Templates Without Tool Support
+
+Some templates genuinely don't support tool calls (this is not a detection bug):
+
+- **Phi 3.5 Mini** - The official template has no tool handling. Use Phi-4-mini-instruct for function calling, or community fine-tuned versions.
+- **Google Gemma 2 2B** - Pure instruction-following model without tool capabilities.
+
+### TODO / Roadmap
+
+- [ ] **Fix OpenAI GPT-OSS**: Add `FUNC_CHANNEL_BASED` format for channel marker structure.
+- [x] **~~Fix Cohere Command-R Plus~~**: Added `FUNC_MARKDOWN_CODE_BLOCK` format for `Action:\n\`\`\`json` structure.
+
+### Recent Additions (Dec 2025 - Jan 2026)
+
+- **FUNC_RECIPIENT_BASED**: Support for Functionary v3.2's `>>>` recipient delimiter format
+- **FUNC_BRACKET_TAG**: Support for Mistral Small 3.2 and Devstral's `[TOOL_CALLS]...` format
+- **Enhanced Content Detection**: Better handling of custom reasoning tags and content wrappers
+- **Improved Streaming Support**: Better handling of partial parsing for all supported formats
+- **Custom Tag Support**: Support for non-standard reasoning tags like `<seed:think>` (ByteDance)
+- **Multi-line Tool Arguments**: Better parsing of complex tool arguments with code blocks
+- **FUNC_MARKDOWN_CODE_BLOCK**: Support for Cohere Command-R Plus markdown code block format
+- **Implicit Reasoning Support**: Support for templates where reasoning starts implicitly without a start marker.
+
+The auto-parser now successfully handles 25+ different template formats across reasoning-only, tool-calling, and hybrid models, with comprehensive test coverage ensuring robust parsing across streaming and non-streaming scenarios.
diff --git a/docs/development/parsing.md b/docs/development/parsing.md
index dbb989bf08..e627ea6502 100644
--- a/docs/development/parsing.md
+++ b/docs/development/parsing.md
@@ -22,7 +22,7 @@ Below is a contrived example demonstrating how to use the PEG parser to parse
 output from a model that emits arguments as JSON.
 
 ```cpp
-auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
     // Build a choice of all available tools
     auto tool_choice = p.choice();
     for (const auto & tool : tools) {
@@ -212,7 +212,7 @@ mapper.from_ast(ctx.ast, result);
 
 ### Native
 
-The `common_chat_peg_native_builder` builds a `native` parser suitable for
+The `common_chat_peg_unified_builder` builds a `native` parser suitable for
 models that emit tool arguments as a direct JSON object.
 
 - **`reasoning(p)`** - Tag node for `reasoning_content`
@@ -225,7 +225,7 @@ models that emit tool arguments as a direct JSON object.
 - **`tool_args(p)`** - Tag the tool arguments
 
 ```cpp
-build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
+build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
     auto get_weather_tool = p.tool(p.sequence({
         p.tool_open(p.literal("{")),
         p.json_member("name", "\"" + p.tool_name(p.literal("get_weather")) + "\""),
@@ -246,7 +246,7 @@ build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
 
 ### Constructed
 
-The `common_chat_peg_constructed_builder` builds a `constructed` parser
+The `common_chat_peg_unified_builder` builds a `constructed` parser
 suitable for models that emit tool arguments as separate entities, such as XML
 tags.
 
@@ -264,7 +264,7 @@ tags.
 - **`tool_arg_json_value(p)`** - Tag JSON value for the argument
 
 ```cpp
-build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
     auto location_arg = p.tool_arg(
         p.tool_arg_open("<parameter name=\"" + p.tool_arg_name(p.literal("location")) + "\">"),
         p.tool_arg_string_value(p.until("</parameter>")),
diff --git a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja
new file mode 100755
index 0000000000..9df29255b7
--- /dev/null
+++ b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja
@@ -0,0 +1,173 @@
+{# ---------------------------------------------------------------------- #}
+{# ƛƬ Default setup and flags                                             #}
+{# ---------------------------------------------------------------------- #}
+{# FIX: Use "is defined" check BEFORE accessing the variable              #}
+{%- set messages = messages if (messages is defined and messages) else [] -%}
+{%- set tools = tools if (tools is defined and tools) else [] -%}
+{%- set add_generation_prompt = add_generation_prompt if (add_generation_prompt is defined) else false -%}
+{%- set available_tool_string = '' -%}
+{%- set add_tool_id = true -%}
+{%- set add_thoughts = true -%}            {# whether to include <thinking> reasoning blocks #}
+{%- set add_generation_prompt = true -%}      {# whether to emit reasoning starter before assistant response #}
+{# Optional token placeholders (safe defaults) #}
+{%- set bos_token = bos_token if (bos_token is defined) else '' -%}
+{%- set eos_token = eos_token if (eos_token is defined) else '' -%}
+{# ---------------------------------------------------------------------- #}
+{# Core reasoning prompt and assistant reasoning prefix                 #}
+{# ---------------------------------------------------------------------- #}
+{%- set reasoning_prompt -%}
+    You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab.
+    Analyze each question carefully, present your reasoning step-by-step, then provide the final
+    response after the marker [BEGIN FINAL RESPONSE].
+{%- endset -%}
+{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%}
+{# ---------------------------------------------------------------------- #}
+{# Tool list and tool call output format                                  #}
+{# ---------------------------------------------------------------------- #}
+{%- if tools|length > 0 -%}
+    {%- set available_tool_string -%}
+        You are provided with function signatures within <available_tools></available_tools> XML tags.
+        You may call one or more functions to assist with the user query.
+        Don't make assumptions about the arguments. You should infer the argument values from previous
+        user responses and the system message.
+        Here are the available tools: 
+        <available_tools>
+        {% for tool in tools %}{{ tool|string }}{% endfor %}
+        
+        </available_tools>.
+
+        Return all function calls as a list of JSON objects within <tool_calls></tool_calls> XML tags.
+        Each JSON object should contain a function name and arguments as follows:
+        <tool_calls>[
+            {"name": <function-name-1>, "arguments": <args-dict-1>},
+            {"name": <function-name-2>, "arguments": <args-dict-2>},
+            ...
+        ]</tool_calls>
+    {%- endset -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Start system block if first message is not system                      #}
+{# ---------------------------------------------------------------------- #}
+{%- if messages|length > 0 and messages[0]['role'] != 'system' -%}
+    {%- if tools|length > 0 -%}
+        {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }}
+    {%- else -%}
+        {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }}
+    {%- endif -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Iterate through messages                                             #}
+{# ---------------------------------------------------------------------- #}
+{%- for message in messages -%}
+
+    {# ---------------- USER MESSAGE ---------------- #}
+    {%- if message['role'] == 'user' -%}
+        {{ '<|begin_user|>\n' }}
+        {%- if message['content'] is not string -%}
+            {%- for chunk in message['content'] -%}
+                {%- if chunk['type'] == 'text' -%}
+                    {{ chunk['text'] }}
+                {%- elif chunk['type'] in ['image', 'image_url'] -%}
+                    {{ '[IMG]' }}
+                {%- else -%}
+                    {{ raise_exception('Unrecognized content type!') }}
+                {%- endif -%}
+            {%- endfor -%}
+        {%- else -%}
+            {{ message['content'] }}
+        {%- endif -%}
+
+    {# ---------------- SYSTEM MESSAGE ---------------- #}
+    {%- elif message['role'] == 'system' -%}
+        {%- set sys_content = message.get('content', '') -%}
+        {%- if sys_content and sys_content|length > 0 -%}
+            {%- if sys_content is string -%}
+                {%- set system_message = sys_content -%}
+            {%- else -%}
+                {%- set system_message = sys_content[0]['text'] -%}
+            {%- endif -%}
+        {%- else -%}
+            {%- set system_message = '' -%}
+        {%- endif -%}
+
+        {%- if tools|length > 0 -%}
+            {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }}
+        {%- else -%}
+            {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }}
+        {%- endif -%}
+
+    {# ---------------- ASSISTANT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'assistant' -%}
+        {%- if loop.last -%}
+            {%- set add_tool_id = false -%}
+        {%- endif -%}
+
+        {{ '\n<|begin_assistant|>\n' }}
+
+        {%- if add_thoughts and message.get('reasoning_content') and loop.last -%}
+            {{ message['reasoning_content'] + '\n[BEGIN FINAL RESPONSE]\n' }}
+        {%- endif -%}
+
+        {%- set asst_content = message.get('content', '') -%}
+        {%- if asst_content and asst_content|length > 0 -%}
+            {%- if asst_content is not string -%}
+                {%- set asst_text = asst_content[0]['text'] -%}
+            {%- else -%}
+                {%- set asst_text = asst_content -%}
+            {%- endif -%}
+            {# For historical turns (not the last), strip reasoning and keep only final response #}
+            {%- if not loop.last and '[BEGIN FINAL RESPONSE]' in asst_text -%}
+                {{- asst_text.split('[BEGIN FINAL RESPONSE]')[-1] | trim -}}
+            {%- else -%}
+                {{- asst_text -}}
+            {%- endif -%}
+        {%- elif message.get('chosen') and message['chosen']|length > 0 -%}
+            {{ message['chosen'][0] }}
+        {%- endif -%}
+
+        {# Tool call output #}
+        {%- set tool_calls = message.get('tool_calls', []) -%}
+        {%- if tool_calls and tool_calls|length > 0 -%}
+            {{ '\n<tool_calls>[' }}
+            {%- for tool_call in tool_calls -%}
+                {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string }}
+                {%- if add_tool_id == true and 'id' in tool_call -%}
+                    {{ ', "id": "' + tool_call['id'] + '"' }}
+                {%- endif -%}
+                {{ '}' }}
+                {%- if not loop.last -%}{{ ', ' }}{%- endif -%}
+            {%- endfor -%}
+            {{ ']</tool_calls>' }}
+        {%- endif -%}
+
+        {%- set training_prompt = training_prompt if (training_prompt is defined) else false -%}
+        {%- if not loop.last or training_prompt -%}
+            {{ '\n<|end|>\n' }}
+        {%- endif -%}
+
+    {# ---------------- TOOL RESULT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'tool' -%}
+        {%- set tool_content = message.get('content', '') -%}
+        {%- if tool_content is string -%}
+            {%- set tool_message = tool_content -%}
+        {%- else -%}
+            {%- set tool_message = tool_content[0]['text'] if tool_content else '' -%}
+        {%- endif -%}
+        {{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }}
+
+    {# ---------------- CONTENT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'content' -%}
+        {%- set msg_content = message.get('content', '') -%}
+        {%- if msg_content is not string -%}
+            {{ '<|begin_content|>\n' + msg_content[0]['text'] + '\n' }}
+        {%- else -%}
+            {{ '<|begin_content|>\n' + msg_content + '\n' }}
+        {%- endif -%}
+    {%- endif -%}
+
+    {# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #}
+    {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
+        {{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }}
+    {%- endif -%} 
+
+{%- endfor -%}
diff --git a/models/templates/Bielik-11B-v3.0-Instruct.jinja b/models/templates/Bielik-11B-v3.0-Instruct.jinja
new file mode 100644
index 0000000000..40ef50076e
--- /dev/null
+++ b/models/templates/Bielik-11B-v3.0-Instruct.jinja
@@ -0,0 +1,77 @@
+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] %}
+    {%- set loop_start_index = 1 %}
+{%- else %}
+    {%- set system_message = "" %}
+    {%- set loop_start_index = 0 %}
+{%- endif %}
+
+{%- if system_message or tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if system_message %}
+        {{- system_message }}
+    {%- endif %}
+    {%- if tools %}
+        {{- '\n\nMasz dostęp do następujących narzędzi. Definicje narzędzi znajdują się poniżej wewnątrz znaczników <|function_list|>:\n<|function_list|>\n' }}
+        {{- '[' }}
+        {%- for tool in tools %}
+            {{- tool | tojson }}
+            {%- if not loop.last %}
+                {{- ',\n' }}
+            {%- endif %}
+        {%- endfor %}
+        {{- ']\n<|function_list|>\n\nAby wywołać narzędzie, użyj formatu <tool_call>{"name": "nazwa_narzędzia", "arguments": {"argument": "wartość"}}</tool_call>. Wyniki działania narzędzi zostaną przekazane z markerem <|function_output|>.\n' }}
+    {%- endif %}
+    {%- if enable_thinking %}
+        {{- '\n\nZanim odpowiesz na pytanie, najpierw przemyśl swoje kroki i umieść swoje myśli wewnątrz tagów <think>...</think>. Musisz najpierw pomyśleć, zanim udzielisz odpowiedzi. ' -}}
+        {{- 'WAŻNE: Powinieneś myśleć w tym samym języku, co pytanie użytkownika. Jeśli pytanie jest zadane po polsku, powinieneś również myśleć po polsku. Jeśli pytanie jest po angielsku, myślisz również po angielsku itd. ' }}
+        {{- '** PAMIĘTAJ! ** Pytanie po polsku -> myślenie po polsku -> odpowiedź po polsku!' -}}
+        {{- '*** BARDZO WAŻNE!!! *** Jesteś Bielikiem, polskim modelem językowym. Twoją główną cechą jest umiejętność pisania po polsku. Jeśli użytkownik zadaje Ci pytania po polsku, ZAWSZE odpowiadaj po polsku. ' -}}
+        {{- 'Nawet, jeśli korzystasz z narzędzia, którego większość instrukcji jest po angielsku, powinieneś przede wszystkim odpowiadać po polsku, jeśli użytkownik zadaje pytanie w tym języku. ' -}}
+    {%- endif %}
+    {{- '<|im_end|>\n' }}
+{%- endif %}
+
+{%- for message in messages[loop_start_index:] %}
+    {%- if message['role'] == 'user' %}
+        {{- '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '<|im_start|>assistant\n' }}
+        {%- set content = message.content | default('') %}
+        {%- set reasoning_content = message.reasoning_content | default('') %}
+        {%- if not reasoning_content and '<think>' in content and '</think>' in content %}
+            {%- set reasoning_parts = content.split('</think>') %}
+            {%- set reasoning_content = reasoning_parts[0].split('<think>')[-1] %}
+            {%- set content = reasoning_parts[1:] | join('</think>') %}
+        {%- endif %}
+        {%- if reasoning_content %}
+            {{- '<think>\n' + reasoning_content.strip() + '\n</think>\n' }}
+        {%- endif %}
+        {{- content.lstrip() }}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '\n<tool_call>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message['role'] == 'tool' %}
+        {%- if loop.index0 == 0 or messages[loop.index0 - 1]['role'] != 'tool' %}
+            {{- '<|im_start|>user\n' }}
+        {%- endif %}
+        {{- '<|function_output|>' + message['content'] }}
+        {%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking %}
+        {{- '<think>\n' }}
+    {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/GLM-4.7-Flash.jinja b/models/templates/GLM-4.7-Flash.jinja
new file mode 100644
index 0000000000..2ab98ef068
--- /dev/null
+++ b/models/templates/GLM-4.7-Flash.jinja
@@ -0,0 +1,86 @@
+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
+{{ '<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '</think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{- '<tool_call>' + tc.name -}}
+{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '<tool_response>' }}
+{{- m.content }}
+{{- '</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/models/templates/LFM2-8B-A1B.jinja b/models/templates/LFM2-8B-A1B.jinja
new file mode 100644
index 0000000000..3738b3d145
--- /dev/null
+++ b/models/templates/LFM2-8B-A1B.jinja
@@ -0,0 +1,47 @@
+{{- bos_token -}}
+{%- set system_prompt = "" -%}
+{%- set ns = namespace(system_prompt="") -%}
+{%- if messages[0]["role"] == "system" -%}
+	{%- set ns.system_prompt = messages[0]["content"] -%}
+	{%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+	{%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "You can use the following tools: <|tool_list_start|>[" -%}
+	{%- for tool in tools -%}
+		{%- if tool is not string -%}
+			{%- set tool = tool | tojson -%}
+		{%- endif -%}
+		{%- set ns.system_prompt = ns.system_prompt + tool -%}
+		{%- if not loop.last -%}
+			{%- set ns.system_prompt = ns.system_prompt + ", " -%}
+		{%- endif -%}
+	{%- endfor -%}
+	{%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}
+	{{- '**IMPORTANT**: The syntax for calling the tools is: <|tool_call_start|>JSON tool call goes here<|tool_call_end|>. Please only call tools in the specified manner.' -}}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+	{{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- for message in messages -%}
+	{{- "<|im_start|>" + message["role"] + "\n" -}}
+	{%- set content = message["content"] -%}
+	{%- if content is not string -%}
+		{%- set content = content | tojson -%}
+	{%- endif -%}
+	{%- if message["role"] == "tool" -%}
+		{%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
+	{%- elif message["role"] == "assistant" -%}
+		{%- if message.tool_calls %}
+			{%- for tool_call in message.tool_calls %}
+				{%- if tool_call.function %}
+					{%- set tool_call = tool_call.function %}
+				{%- endif %}
+				{{- '\n<|tool_call_start|>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n<|tool_call_end|>\n' }}
+			{%- endfor %}
+		{%- endif %}
+	{%- endif -%}
+	{{- content + "<|im_end|>\n" -}}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+	{{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
diff --git a/models/templates/Qwen3-Coder.jinja b/models/templates/Qwen3-Coder.jinja
index 49b0e8d0ee..cde8c0e43d 100644
--- a/models/templates/Qwen3-Coder.jinja
+++ b/models/templates/Qwen3-Coder.jinja
@@ -29,7 +29,7 @@
     {%- endif %}
 {%- endif %}
 {%- if tools is iterable and tools | length > 0 %}
-    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+    {{- "\n\n# Tools\n\nYou have access to the following tools:\n\n" }}
     {{- "<tools>" }}
     {%- for tool in tools %}
         {%- if tool.function is defined %}
@@ -63,7 +63,7 @@
         {{- '\n</function>' }}
     {%- endfor %}
     {{- "\n</tools>" }}
-    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+    {{- '\n\nIf you choose to call a tool ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nvalue_2\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening <tool_call> tag and end with a closing </tool_call> tag.\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
 {%- endif %}
 {%- if system_message is defined %}
     {{- '<|im_end|>\n' }}
diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
index c2066bd739..299f7a7ff1 100644
--- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
@@ -1 +1,44 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- set ns.system_prompt = message['content'] -%}
+  {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is none -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls']-%}
+      {%- if not ns.is_first -%}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+        {%- set ns.is_first = true -%}
+        {%- else -%}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+      {%- endif -%}
+    {%- endfor -%}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+    {%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+      {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>')[-1] -%}
+      {%- endif -%}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_tool = true -%}
+    {%- if ns.is_output_first -%}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+      {%- set ns.is_output_first = false -%}
+      {%- else -%}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<｜Assistant｜><think>\n'}}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
index c2066bd739..fff2b755e2 100644
--- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
@@ -1 +1,47 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- set ns.system_prompt = message['content'] -%}
+  {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is none -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls']-%}
+      {%- if not ns.is_first -%}
+        {{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+        {%- set ns.is_first = true -%}
+      {%- else -%}
+        {{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+      {%- endif -%}
+    {%- endfor -%}
+    {{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+    {%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+    {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>')[-1] -%}
+      {%- endif -%}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_tool = true -%}
+    {%- if ns.is_output_first -%}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+      {%- set ns.is_output_first = false -%}
+      {%- else -%}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<｜Assistant｜><think>\n'}}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
index e5656196a3..6ef7fb123c 100644
--- a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
@@ -1,3 +1,71 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- if not thinking is defined -%}
+  {%- if enable_thinking is defined -%}
+    {%- set thinking = enable_thinking -%}
+    {%- else -%}
+    {%- set thinking = false -%}
+  {%- endif -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- if ns.is_first_sp -%}
+      {%- set ns.system_prompt = ns.system_prompt + message['content'] -%}
+      {%- set ns.is_first_sp = false -%}
+      {%- else -%}
+      {%- set ns.system_prompt = ns.system_prompt + '
 
-' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<｜Assistant｜></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- else %}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<｜Assistant｜>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}}  {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}
\ No newline at end of file
+' + message['content'] -%}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}{{ bos_token }}{{ ns.system_prompt }}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none -%}
+    {%- if ns.is_last_user -%}{{'<｜Assistant｜></think>'}}
+    {%- endif -%}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls'] -%}
+      {%- if not ns.is_first -%}
+        {%- if message['content'] is none -%}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+          {%- else -%}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+        {%- endif -%}
+        {%- set ns.is_first = true -%}
+        {%- else -%}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+      {%- endif -%}
+    {%- endfor -%}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) -%}
+    {%- if ns.is_last_user -%}{{'<｜Assistant｜>'}}
+      {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{'<think>'}}
+        {%- else -%}{{'</think>'}}
+      {%- endif -%}
+    {%- endif -%}
+    {%- set ns.is_last_user = false -%}
+    {%- if ns.is_tool -%}{{message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+      {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>', 1)[1] -%}
+      {%- endif -%}{{content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+  {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<｜Assistant｜>'}}
+  {%- if not thinking -%}{{'</think>'}}
+    {%- else -%}{{'<think>'}}
+  {%- endif -%}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja
index ecb49a2108..e286d8a7b5 100644
--- a/models/templates/moonshotai-Kimi-K2.jinja
+++ b/models/templates/moonshotai-Kimi-K2.jinja
@@ -1,43 +1,43 @@
-{%- if tools -%}
-  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
-{%- endif -%}
-{%- for message in messages -%}
-  {%- if loop.first and messages[0]['role'] != 'system' -%}
-    <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
-  {%- endif -%}
-  {%- if message['role'] == 'system' -%}
-    <|im_system|>system<|im_middle|>
-  {%- elif message['role'] == 'user' -%}
-    <|im_user|>user<|im_middle|>
-  {%- elif message['role'] == 'assistant' -%}
-    <|im_assistant|>assistant<|im_middle|>
-  {%- elif message['role'] == 'tool' -%}
-    <|im_system|>tool<|im_middle|>
-  {%- endif -%}
-  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
-    {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
-    <|tool_calls_section_begin|>
-    {%- for tool_call in message['tool_calls'] -%}
-      {%- set func_name = tool_call['function']['name'] -%}
-      {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
-      <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
-    {%- endfor -%}
-    <|tool_calls_section_end|>
-  {%- elif message['role'] == 'tool' -%}
-    ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
-  {%- elif message['content'] is string -%}
-    {{ message['content'] }}
-  {%- elif message['content'] is not none -%}
-    {% for content in message['content'] -%}
-      {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
-        <|media_start|>image<|media_content|><|media_pad|><|media_end|>
-      {% else -%}
-        {{ content['text'] }}
-      {%- endif -%}
-    {%- endfor -%}
-  {%- endif -%}
-  <|im_end|>
-{%- endfor -%}
-{%- if add_generation_prompt -%}
-  <|im_assistant|>assistant<|im_middle|>
-{%- endif -%}
+{%- if tools -%}
+  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
+{%- endif -%}
+{%- for message in messages -%}
+  {%- if loop.first and messages[0]['role'] != 'system' -%}
+    <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
+  {%- endif -%}
+  {%- if message['role'] == 'system' -%}
+    <|im_system|>system<|im_middle|>
+  {%- elif message['role'] == 'user' -%}
+    <|im_user|>user<|im_middle|>
+  {%- elif message['role'] == 'assistant' -%}
+    <|im_assistant|>assistant<|im_middle|>
+  {%- elif message['role'] == 'tool' -%}
+    <|im_system|>tool<|im_middle|>
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
+    {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
+    <|tool_calls_section_begin|>
+    {%- for tool_call in message['tool_calls'] -%}
+      {%- set func_name = tool_call['function']['name'] -%}
+      {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
+      <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
+    {%- endfor -%}
+    <|tool_calls_section_end|>
+  {%- elif message['role'] == 'tool' -%}
+    ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
+  {%- elif message['content'] is string -%}
+    {{ message['content'] }}
+  {%- elif message['content'] is not none -%}
+    {% for content in message['content'] -%}
+      {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+        <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+      {% else -%}
+        {{ content['text'] }}
+      {%- endif -%}
+    {%- endfor -%}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+  <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}
diff --git a/models/templates/unsloth-Apriel-1.5.jinja b/models/templates/unsloth-Apriel-1.5.jinja
index 29e582fbf6..8e59d2f1d4 100644
--- a/models/templates/unsloth-Apriel-1.5.jinja
+++ b/models/templates/unsloth-Apriel-1.5.jinja
@@ -86,19 +86,19 @@ Prior to generating the function calls, you should generate the reasoning for wh
             {%- set add_tool_id = false -%}
         {%- endif -%}
         {{- '<|assistant|>\n' -}}
-        {%- if message['content'] is not none and message['content']|length > 0 -%}
+        {%- if message['content'] is defined and message['content'] is not none and message['content']|length > 0 -%}
             {%- if message['content'] is not string and message['content'][0]['text'] is not none %}
                 {{- message['content'][0]['text'] }}
             {%- else %}
                 {{- message['content'] -}}
             {%- endif -%}
-        {%- elif message['chosen'] is not none and message['chosen']|length > 0 -%}
+        {%- elif message['chosen'] is defined and message['chosen'] is not none and message['chosen']|length > 0 -%}
             {{- message['chosen'][0] -}}
         {%- endif -%}
         {%- if add_thoughts and 'thought' in message and message['thought'] is not none -%}
             {{- '<thinking>' + message['thought'] + '</thinking>' -}}
         {%- endif -%}
-        {%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
+        {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
             {{- '\n<tool_calls>[' -}}
             {%- for tool_call in message["tool_calls"] -%}
                 {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}}
diff --git a/scripts/server-bench.py b/scripts/server-bench.py
index dbbb0939ff..2ef7258712 100755
--- a/scripts/server-bench.py
+++ b/scripts/server-bench.py
@@ -230,7 +230,7 @@ def benchmark(
 
     logger.info("")
     logger.info(f"Benchmark duration:                {token_t_last:.2f} s")
-    logger.info(f"Request throughput:                {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min")
+    logger.info(f"Request throughput:                {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last / 60):.2f} requests/min")
     logger.info(f"Total prompt length:               {np.sum(prompt_n)} tokens")
     logger.info(f"Average prompt length:             {np.mean(prompt_n):.2f} tokens")
     logger.info(f"Average prompt latency:            {1e3 * np.mean(prompt_t):.2f} ms")
diff --git a/scripts/server-test-model.py b/scripts/server-test-model.py
new file mode 100644
index 0000000000..9049d80279
--- /dev/null
+++ b/scripts/server-test-model.py
@@ -0,0 +1,202 @@
+import argparse
+import json
+import requests
+import logging
+import sys
+
+handler = logging.StreamHandler(sys.stdout)
+handler.terminator = ""   # ← no newline
+logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
+logger = logging.getLogger("server-test-model")
+
+
+def run_query(url, messages, tools=None, stream=False, tool_choice=None):
+    payload = {
+        "messages": messages,
+        "stream": stream,
+        "max_tokens": 5000,
+    }
+    if tools:
+        payload["tools"] = tools
+    if tool_choice:
+        payload["tool_choice"] = tool_choice
+
+    try:
+        response = requests.post(url, json=payload, stream=stream)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        if e.response is not None:
+            logger.info(f"Response error: {e} for {e.response.content}\n")
+        else:
+            logger.info(f"Error connecting to server: {e}\n")
+        return None
+
+    full_content = ""
+    reasoning_content = ""
+    tool_calls = []
+
+    if stream:
+        logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
+        for line in response.iter_lines():
+            if line:
+                decoded_line = line.decode("utf-8")
+                if decoded_line.startswith("data: "):
+                    data_str = decoded_line[6:]
+                    if data_str == "[DONE]":
+                        break
+                    try:
+                        data = json.loads(data_str)
+                        if "choices" in data and len(data["choices"]) > 0:
+                            delta = data["choices"][0].get("delta", {})
+
+                            # Content
+                            content_chunk = delta.get("content", "")
+                            if content_chunk:
+                                full_content += content_chunk
+                                logger.info(content_chunk)
+
+                            # Reasoning
+                            reasoning_chunk = delta.get("reasoning_content", "")
+                            if reasoning_chunk:
+                                reasoning_content += reasoning_chunk
+                                logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
+
+                            # Tool calls
+                            if "tool_calls" in delta:
+                                for tc in delta["tool_calls"]:
+                                    index = tc.get("index")
+                                    if index is not None:
+                                        while len(tool_calls) <= index:
+                                            # Using "function" as type default but could be flexible
+                                            tool_calls.append(
+                                                {
+                                                    "id": "",
+                                                    "type": "function",
+                                                    "function": {
+                                                        "name": "",
+                                                        "arguments": "",
+                                                    },
+                                                }
+                                            )
+
+                                        if "id" in tc:
+                                            tool_calls[index]["id"] += tc["id"]
+                                        if "function" in tc:
+                                            if "name" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "name"
+                                                ] += tc["function"]["name"]
+                                            if "arguments" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "arguments"
+                                                ] += tc["function"]["arguments"]
+
+                    except json.JSONDecodeError:
+                        logger.info(f"Failed to decode JSON: {data_str}\n")
+        logger.info("\n--- End of Stream ---\n")
+    else:
+        logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
+        data = response.json()
+        if "choices" in data and len(data["choices"]) > 0:
+            message = data["choices"][0].get("message", {})
+            full_content = message.get("content", "")
+            reasoning_content = message.get("reasoning_content", "")
+            tool_calls = message.get("tool_calls", [])
+            logger.info(full_content)
+        logger.info("--- End of Response ---\n")
+
+    return {
+        "content": full_content,
+        "reasoning_content": reasoning_content,
+        "tool_calls": tool_calls,
+    }
+
+
+def test_chat(url, stream):
+    logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
+    messages = [{"role": "user", "content": "What is the capital of France?"}]
+    result = run_query(url, messages, stream=stream)
+
+    if result:
+        if result["content"]:
+            logger.info("PASS: Output received.\n")
+        else:
+            logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
+
+        if result.get("reasoning_content"):
+            logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
+        else:
+            logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
+    else:
+        logger.info("FAIL: No result.\n")
+
+
+def test_tool_call(url, stream):
+    logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the weather in London? Please use the get_weather tool.",
+        }
+    ]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+    result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
+
+    if result:
+        tcs = result.get("tool_calls")
+        if tcs and len(tcs) > 0:
+            logger.info("PASS: Tool calls detected.")
+            for tc in tcs:
+                func = tc.get("function", {})
+                logger.info(f"  Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
+        else:
+            logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
+
+        if result.get("reasoning_content"):
+            logger.info(
+                f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
+            )
+    else:
+        logger.info("FAIL: Query failed.\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Test llama-server functionality.")
+    parser.add_argument("--host", default="localhost", help="Server host")
+    parser.add_argument("--port", default=8080, type=int, help="Server port")
+    args = parser.parse_args()
+
+    base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
+    logger.info(f"Testing server at {base_url}\n")
+
+    # Non-streaming tests
+    test_chat(base_url, stream=False)
+    test_tool_call(base_url, stream=False)
+
+    # Streaming tests
+    test_chat(base_url, stream=True)
+    test_tool_call(base_url, stream=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/snapdragon/qdc/tests/test_bench.py b/scripts/snapdragon/qdc/tests/test_bench.py
index 651ab5b717..bd19e5d26c 100644
--- a/scripts/snapdragon/qdc/tests/test_bench.py
+++ b/scripts/snapdragon/qdc/tests/test_bench.py
@@ -14,7 +14,7 @@ cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_pa
 def run_cmd(cmd):
     p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
     sys.stdout.write(p.stdout)
-    assert(p.returncode == 0)
+    assert (p.returncode == 0)
 
 
 @pytest.mark.dependency()
diff --git a/src/models/models.h b/src/models/models.h
index 3a44f7f140..54f73c64d3 100644
--- a/src/models/models.h
+++ b/src/models/models.h
@@ -1,10 +1,11 @@
 #pragma once
 
-#include "../llama-model.h"
 #include "../llama-graph.h"
+#include "../llama-model.h"
 
 // TODO: remove in follow-up PR - move to .cpp files
 #include "../llama-memory-recurrent.h"
+
 #include <cmath>
 
 struct llm_graph_context_mamba : public llm_graph_context {
@@ -12,9 +13,16 @@ struct llm_graph_context_mamba : public llm_graph_context {
 
     virtual ~llm_graph_context_mamba() = default;
 
-    ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
-    ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il) const;
-
+    ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp,
+                                    ggml_tensor *        cur,
+                                    const llama_model &  model,
+                                    const llama_ubatch & ubatch,
+                                    int                  il);
+    ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp,
+                                     ggml_tensor *        cur,
+                                     const llama_model &  model,
+                                     const llama_ubatch & ubatch,
+                                     int                  il) const;
 };
 
 // Base class for RWKV-related models
@@ -158,8 +166,7 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
     llm_build_ernie4_5_moe(const llama_model & model, const llm_graph_params & params);
 };
 
-template <bool iswa>
-struct llm_build_exaone4 : public llm_graph_context {
+template <bool iswa> struct llm_build_exaone4 : public llm_graph_context {
     llm_build_exaone4(const llama_model & model, const llm_graph_params & params);
 };
 
@@ -183,8 +190,7 @@ struct llm_build_gemma2_iswa : public llm_graph_context {
     llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params);
 };
 
-template <bool iswa>
-struct llm_build_gemma3 : public llm_graph_context {
+template <bool iswa> struct llm_build_gemma3 : public llm_graph_context {
     llm_build_gemma3(const llama_model & model, const llm_graph_params & params);
 };
 
@@ -195,8 +201,8 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
     const int64_t n_embd_altup;
     const int64_t n_altup;
     const int     i_altup_act;
-    const int     n_layer_sparsity = 10; // number of layers using activation sparsity
-    const float   f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
+    const int     n_layer_sparsity   = 10;                   // number of layers using activation sparsity
+    const float   f_sparsity_std_mul = 1.6448533535003662f;  // std_multiplier = normal_dist.icdf(0.95)
 
     llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params);
     ggml_tensor * calc_magnitude(ggml_tensor * x);
@@ -237,27 +243,26 @@ struct llm_build_gptneox : public llm_graph_context {
 struct llm_build_granite : public llm_graph_context {
     llm_build_granite(const llama_model & model, const llm_graph_params & params);
 
-private:
-    ggml_tensor * build_attention_layer(
-              ggml_tensor             * cur,
-              ggml_tensor             * inp_pos,
-              llm_graph_input_attn_kv * inp_attn,
-        const llama_model             & model,
-        const int64_t                 n_embd_head,
-        const int                     il);
+  private:
+    ggml_tensor * build_attention_layer(ggml_tensor *             cur,
+                                        ggml_tensor *             inp_pos,
+                                        llm_graph_input_attn_kv * inp_attn,
+                                        const llama_model &       model,
+                                        const int64_t             n_embd_head,
+                                        const int                 il);
 
-    ggml_tensor * build_layer_ffn(
-              ggml_tensor       * cur,
-              ggml_tensor       * inpSA,
-        const llama_model       & model,
-        const int                 il);
+    ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
 };
 
 struct llm_build_granite_hybrid : public llm_graph_context_mamba {
     llm_build_granite_hybrid(const llama_model & model, const llm_graph_params & params);
     ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
-    ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn,
-        const llama_model & model,const int64_t n_embd_head, const int il);
+    ggml_tensor * build_attention_layer(ggml_tensor *             cur,
+                                        ggml_tensor *             inp_pos,
+                                        llm_graph_input_attn_kv * inp_attn,
+                                        const llama_model &       model,
+                                        const int64_t             n_embd_head,
+                                        const int                 il);
 };
 
 struct llm_build_grok : public llm_graph_context {
@@ -294,9 +299,11 @@ struct llm_build_lfm2 : public llm_graph_context {
     llm_build_lfm2(const llama_model & model, const llm_graph_params & params);
     ggml_tensor * build_moe_feed_forward(ggml_tensor * cur, int il) const;
     ggml_tensor * build_dense_feed_forward(ggml_tensor * cur, int il) const;
-    ggml_tensor * build_attn_block(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, int il) const;
+    ggml_tensor * build_attn_block(ggml_tensor *             cur,
+                                   ggml_tensor *             inp_pos,
+                                   llm_graph_input_attn_kv * inp_attn,
+                                   int                       il) const;
     ggml_tensor * build_shortconv_block(ggml_tensor * cur, llm_graph_input_rs * inp_recr, int il);
-
 };
 
 struct llm_build_llada : public llm_graph_context {
@@ -355,16 +362,18 @@ struct llm_build_nemotron : public llm_graph_context {
 struct llm_build_nemotron_h : public llm_graph_context_mamba {
     llm_build_nemotron_h(const llama_model & model, const llm_graph_params & params);
     ggml_tensor * build_ffn_layer(ggml_tensor * cur, const llama_model & model, const int il);
-    ggml_tensor * build_attention_layer(ggml_tensor * cur, llm_graph_input_attn_kv * inp_attn,
-        const llama_model & model, const int64_t n_embd_head, const int il);
+    ggml_tensor * build_attention_layer(ggml_tensor *             cur,
+                                        llm_graph_input_attn_kv * inp_attn,
+                                        const llama_model &       model,
+                                        const int64_t             n_embd_head,
+                                        const int                 il);
 };
 
 struct llm_build_neo_bert : public llm_graph_context {
     llm_build_neo_bert(const llama_model & model, const llm_graph_params & params);
 };
 
-template <bool iswa>
-struct llm_build_olmo2 : public llm_graph_context {
+template <bool iswa> struct llm_build_olmo2 : public llm_graph_context {
     llm_build_olmo2(const llama_model & model, const llm_graph_params & params);
 };
 
@@ -396,17 +405,23 @@ struct llm_build_phi2 : public llm_graph_context {
     llm_build_phi2(const llama_model & model, const llm_graph_params & params);
 };
 
-template<bool iswa>
-struct llm_build_phi3 : public llm_graph_context {
+template <bool iswa> struct llm_build_phi3 : public llm_graph_context {
     llm_build_phi3(const llama_model & model, const llm_graph_params & params);
 };
 
 struct llm_build_plamo2 : public llm_graph_context_mamba {
     llm_build_plamo2(const llama_model & model, const llm_graph_params & params);
-    private:
-        ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
-        ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, ggml_tensor * inp_pos, ggml_tensor * cur,
-                                                const llama_model & model, int il);
+  private:
+    ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp,
+                                           ggml_tensor *        cur,
+                                           const llama_model &  model,
+                                           const llama_ubatch & ubatch,
+                                           int                  il);
+    ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp,
+                                          ggml_tensor *             inp_pos,
+                                          ggml_tensor *             cur,
+                                          const llama_model &       model,
+                                          int                       il);
 };
 
 struct llm_build_plamo : public llm_graph_context {
@@ -449,26 +464,23 @@ struct llm_build_qwen3vl : public llm_graph_context {
 struct llm_build_qwen3vlmoe : public llm_graph_context {
     llm_build_qwen3vlmoe(const llama_model & model, const llm_graph_params & params);
 };
+
 struct llm_build_qwen3next : public llm_graph_context_mamba {
     llm_build_qwen3next(const llama_model & model, const llm_graph_params & params);
-private:
-    ggml_tensor * build_layer_attn(
-    llm_graph_input_attn_kv * inp_attn,
-                ggml_tensor * cur,
-                ggml_tensor * inp_pos,
-                        int   il);
+  private:
+    ggml_tensor * build_layer_attn(llm_graph_input_attn_kv * inp_attn,
+                                   ggml_tensor *             cur,
+                                   ggml_tensor *             inp_pos,
+                                   int                       il);
 
-    ggml_tensor * build_layer_attn_linear(
-         llm_graph_input_rs * inp,
-                ggml_tensor * cur,
-                ggml_tensor * causal_mask,
-                ggml_tensor * identity,
-                ggml_tensor * diag_mask,
-                        int   il);
+    ggml_tensor * build_layer_attn_linear(llm_graph_input_rs * inp,
+                                          ggml_tensor *        cur,
+                                          ggml_tensor *        causal_mask,
+                                          ggml_tensor *        identity,
+                                          ggml_tensor *        diag_mask,
+                                          int                  il);
 
-    ggml_tensor * build_layer_ffn(
-                ggml_tensor * cur,
-                        int   il);
+    ggml_tensor * build_layer_ffn(ggml_tensor * cur, int il);
 
     // returns pair of output and new state
     std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
@@ -535,8 +547,7 @@ struct llm_build_seed_oss : public llm_graph_context {
     llm_build_seed_oss(const llama_model & model, const llm_graph_params & params);
 };
 
-template <bool iswa>
-struct llm_build_smallthinker : public llm_graph_context {
+template <bool iswa> struct llm_build_smallthinker : public llm_graph_context {
     llm_build_smallthinker(const llama_model & model, const llm_graph_params & params);
 };
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index c9436c5995..5607055990 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -183,9 +183,7 @@ if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
     # llama_build_and_test(test-double-float.cpp) # SLOW
 endif()
 
-llama_build_and_test(test-chat-parser.cpp)
 llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
-llama_build_and_test(test-chat-template.cpp)
 llama_build_and_test(test-jinja.cpp)
 llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
 llama_build_and_test(test-json-partial.cpp)
@@ -260,3 +258,5 @@ target_link_libraries(${TEST_TARGET} PRIVATE llama)
 
 llama_build_and_test(test-alloc.cpp)
 target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
+
+
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index fbe23037cc..1252b9e588 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -7742,6 +7742,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
         test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1,  1}, {1, 1}));
     }
 
+    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 6, 4096, 5120, {1, 1}, {1, 1}));
+
 #if 0
     // test the mat-mat path for Metal
     for (int k = 1; k < 512; ++k) {
diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp
deleted file mode 100644
index 6f44a2b421..0000000000
--- a/tests/test-chat-parser.cpp
+++ /dev/null
@@ -1,617 +0,0 @@
-//  Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
-//
-//  Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
-//  e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
-//
-//    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
-//
-#include <exception>
-#include <iostream>
-#include <string>
-
-#include "chat-parser.h"
-#include "common.h"
-#include "log.h"
-#include "regex-partial.h"
-
-template <class T>
-static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
-    if (expected != actual) {
-        std::cerr << label << std::endl;
-        std::cerr << "Expected: " << expected << std::endl;
-        std::cerr << "Actual: " << actual << std::endl;
-        std::cerr << std::flush;
-        throw std::runtime_error("Test failed");
-    }
-}
-
-template <class T>
-static void assert_equals(const T & expected, const T & actual) {
-    assert_equals("", expected, actual);
-}
-static void assert_equals(const char * expected, const std::string & actual) {
-  return assert_equals<std::string>(expected, actual);
-}
-
-static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
-    try {
-        fn();
-    } catch (const std::exception & e) {
-      if (expected_exception_pattern.empty()) {
-          return;
-        }
-        std::regex expected_exception_regex(expected_exception_pattern);
-        std::string actual_message = e.what();
-        if (std::regex_search(actual_message, expected_exception_regex)) {
-            return;
-        }
-        throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
-        throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
-    }
-    throw std::runtime_error("Exception was expected but not thrown");
-}
-
-static void test_reasoning() {
-  //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = true;
-    params.thinking_forced_open = true;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("<think>Cogito</think>", builder.result().content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    const std::string variant("content_only_inline_think");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = false;
-    const std::string input = "<think>Pense</think>Bonjour";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("Pense"), msg.reasoning_content);
-    assert_equals(variant, std::string("Bonjour"), msg.content);
-  }
-  {
-    const std::string variant("llama_3_inline_think");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_LLAMA_3_X;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = false;
-    const std::string input = "<think>Plan</think>Réponse";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("Plan"), msg.reasoning_content);
-    assert_equals(variant, std::string("Réponse"), msg.content);
-  }
-  // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    params.parse_tool_calls = true;
-    const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
-    common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, params);
-    assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
-    assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
-    assert_equals(variant, std::string("ok"), builder.consume_rest());
-  }
-  // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    params.parse_tool_calls = true;
-    const std::string variant("deepseek_v3_1_reasoning_format_none");
-    const std::string input = "REASONING</think>ok";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
-    assert_equals(variant, std::string(""), msg.reasoning_content);
-  }
-}
-
-static void test_regex() {
-  auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
-    common_chat_msg_parser builder(input, /* is_partial= */ false, {});
-    assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
-  };
-
-  test_throws("Hello, world!", "abc", "^abc$");
-  test_throws("Hello, world!", "e", "^e$");
-
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
-    builder.consume_regex(common_regex("Hello"));
-    assert_equals(", world!", builder.consume_rest());
-  }
-
-  {
-    // When in non partial mode, we can say whether the regex was consumed or not.
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
-    assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
-  }
-  {
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
-    auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
-    assert_equals(true, res.has_value());
-    // Verify captures
-    assert_equals<size_t>(2, res->groups.size());
-    assert_equals("Hell", builder.str(res->groups[0]));
-    assert_equals("el", builder.str(res->groups[1]));
-    // Verify position is after the match
-    assert_equals<size_t>(4, builder.pos());
-    assert_equals("o,", builder.consume_rest());
-  }
-  {
-    // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
-    assert_throws([&]() {
-      builder.try_consume_regex(common_regex("Hello, world!"));
-    }, "^Hello, world!$");
-  }
-
-  // Now regardless of the mode, we can tell these aren't a match.
-  for (const auto is_partial : {false, true}) {
-    common_chat_msg_parser builder("Hello,", is_partial, {});
-    assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
-  }
-  for (const auto is_partial : {false, true}) {
-    common_chat_msg_parser builder("Hello,", is_partial, {});
-    assert_equals(false, builder.try_consume_literal("Oh"));
-  }
-}
-
-const std::vector<std::string> barely_healable_jsons = {
-  "{",
-  "{\"",
-  "{\"\\",
-  "{\"n",
-  "{\"name\"",
-  "{\"name\":",
-  "{\"name\":\"",
-  "{\"name\":\"\\",
-  "{\"name\":\"python",
-  "{\"name\":\"python\\",
-  "{\",",
-  "{\":",
-  "{\"[",
-  "{\"]",
-  "{\"{",
-  "{\"}",
-  "{\"1",
-  "{\"name\":\",",
-  "{\"name\":\":",
-  "{\"name\":\"[",
-  "{\"name\":\"]",
-  "{\"name\":\"{",
-  "{\"name\":\"}",
-  "{\"name\":\"1",
-};
-
-static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
-  common_chat_msg_parser builder(input, is_partial, {});
-  auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
-  assert_equals(true, js.has_value());
-  assert_equals(is_partial, js->is_partial);
-  assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
-}
-
-static void test_deepseek_v3_1_tool_calls() {
-    //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
-    // variant: happy path for when it works as the model card says it should
-    const std::string variant("simple");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = true;
-    const std::string input = "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
-    assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
-    // JSON arguments are dumped without spaces
-    assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
-    assert_equals(variant, std::string(""), msg.content);
-    assert_equals(variant, std::string(""), msg.reasoning_content);
-
-    // variant: simple + thinking open
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("simple_thinking");
-        const std::string in = "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
-    }
-    // variant: simple + multiple tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        params.parse_tool_calls = true;
-        const std::string variant("simple_multiple_tool_calls");
-        const std::string in = "CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
-        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-    }
-
-
-    // variant: thinking forced open + tool call in reasoning content
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING"), m.reasoning_content);
-    }
-
-    // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
-    //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
-    //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
-    //          add the reasoning content as regular content and parse the tool calls.
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals(variant, std::string("REASONING"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-    }
-
-    // variant: thinking forced open + tool call in reasoning content + no closing think + partial
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, /* is_partial= */ true, params);
-        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>"), m.reasoning_content);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-    }
-
-    // variant: thinking not forced open + reasoning + regular content + no tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
-        const std::string in = "REASONING</think>CONTENT";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
-    }
-    // variant: thinking not forced open + missing reasoning + no tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
-        const std::string in = "CONTENT";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-    }
-}
-
-static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
-  common_chat_msg_parser builder(input, parse_as_partial, {});
-  auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
-  assert_equals(true, js.has_value());
-  assert_equals(is_partial, js->is_partial);
-  assert_equals(expected, js->value.dump());
-}
-
-static void test_json_with_dumped_args_no_args() {
-  // Normal JSON, nothing to heal, nothing to dump
-  test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
-  // Full json is args
-  test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
-
-  // If the arguments are further down, don't heal partial content.
-  for (const auto & src : barely_healable_jsons) {
-    test(src, true, {{"arguments"}}, {}, "{}");
-  }
-  // But heal content that isn't partial.
-  test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
-}
-
-static void test_json_with_dumped_args() {
-
-  // Partial content.
-  test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
-  test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
-  test("{\"content\": ", true, {}, {{"content"}}, "{}");
-
-  // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
-  test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
-  for (const auto & src : barely_healable_jsons) {
-    test(src, true, {{}}, {}, src);
-  }
-
-  // Full JSON w/ args
-  for (auto parse_as_partial : {true, false}) {
-    test_with_args(
-      R"({"name": "python", "args": {"arg1": 1}})",
-      R"({"name":"python","args":"{\"arg1\":1}"})",
-      parse_as_partial,
-      /* is_partial= */ false
-    );
-  }
-
-  // Partial JSON w/ partial args
-  test_with_args(
-    R"({"foo": "bar", "args": {")",
-    R"({"foo":"bar","args":"{\""})"
-  );
-  // Partial args broken in object key
-  test_with_args(
-    R"({"foo": "bar", "args": {"ar)",
-    R"({"foo":"bar","args":"{\"ar"})"
-  );
-  // Partial args broken after object key
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1")",
-    R"({"foo":"bar","args":"{\"arg1\""})"
-  );
-  // Partial args broken before object value
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1":)",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken before object value (space)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": )",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken in object value that may not be complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": 1)",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken in object value that is complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": 1 )",
-    R"({"foo":"bar","args":"{\"arg1\":1"})"
-  );
-  // Partial args broken in object value that is incomplete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": ")",
-    R"({"foo":"bar","args":"{\"arg1\":\""})"
-  );
-  // Partial args broken in object value that is complete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "1")",
-    R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
-  );
-  // Partial args broken on array opening
-  test_with_args(
-    R"({"foo": "bar", "args": [)",
-    R"({"foo":"bar","args":"["})"
-  );
-  // Partial args broken on array value that is incomplete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": [1)",
-    R"({"foo":"bar","args":"["})"
-  );
-  // Partial args broken on array value that is complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": [1 )",
-    R"({"foo":"bar","args":"[1"})"
-  );
-  // Partial args broken on array value that is complete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": ["1")",
-    R"({"foo":"bar","args":"[\"1\""})"
-  );
-  // Partial args broken after array value
-  test_with_args(
-    R"({"foo": "bar", "args": [1,)",
-    R"({"foo":"bar","args":"[1,"})"
-  );
-  // Partial args broken on nested array
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": [)",
-    R"({"foo":"bar","args":"{\"arg1\":["})"
-  );
-
-  // Unicode tests
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u0)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u00)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u000)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u0000)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud8)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud80)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
-  );
-}
-
-static void test_positions() {
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
-    assert_equals<size_t>(0, builder.pos());
-    assert_throws([&]() { builder.move_to(100); });
-    assert_equals<size_t>(0, builder.pos());
-    assert_throws([&]() { builder.move_back(1); });
-    assert_equals<size_t>(0, builder.pos());
-
-    builder.move_to(8);
-    assert_equals<size_t>(8, builder.pos());
-    builder.move_back(1);
-    assert_equals<size_t>(7, builder.pos());
-    assert_equals("world!", builder.consume_rest());
-
-    builder.move_to(0);
-    assert_equals<size_t>(0, builder.pos());
-
-    assert_throws([&]() { builder.finish(); });
-    assert_equals<size_t>(0, builder.pos());
-
-    builder.move_to(builder.input().size());
-    builder.finish();
-  }
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
-
-    builder.move_to(builder.input().size());
-    assert_equals<size_t>(builder.input().size(), builder.pos());
-    builder.finish();
-  }
-}
-
-int main() {
-    test_positions();
-    test_json_with_dumped_args_no_args();
-    test_json_with_dumped_args();
-    test_reasoning();
-    test_regex();
-    test_deepseek_v3_1_tool_calls();
-    std::cout << "All tests passed!\n";
-    return 0;
-}
diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp
index f767c73c27..ae82966699 100644
--- a/tests/test-chat-peg-parser.cpp
+++ b/tests/test-chat-peg-parser.cpp
@@ -1,8 +1,3 @@
-#include <string>
-#include <iostream>
-#include <numeric>
-
-#include "chat-parser.h"
 #include "chat-peg-parser.h"
 #include "chat.h"
 #include "common.h"
@@ -10,6 +5,11 @@
 #include "peg-parser.h"
 #include "testing.h"
 #include "peg-parser/simple-tokenize.h"
+
+#include <iostream>
+#include <numeric>
+#include <string>
+
 #include "nlohmann/json.hpp"
 
 using json = nlohmann::ordered_json;
@@ -17,9 +17,11 @@ using json = nlohmann::ordered_json;
 static json create_tools();
 static void test_example_native(testing & t);
 static void test_example_qwen3_coder(testing & t);
+static void test_example_qwen3_non_coder(testing & t);
 static void test_command7_parser_compare(testing & t);
+static void test_prefix_tool_names(testing & t);
 
-int main(int argc, char *argv[]) {
+int main(int argc, char * argv[]) {
     testing t(std::cout);
     if (argc >= 2) {
         t.set_filter(argv[1]);
@@ -32,7 +34,9 @@ int main(int argc, char *argv[]) {
 
     t.test("native", test_example_native);
     t.test("qwen3 coder", test_example_qwen3_coder);
+    t.test("qwen3 non-coder", test_example_qwen3_non_coder);
     t.test("comparison", test_command7_parser_compare);
+    t.test("prefix tool names", test_prefix_tool_names);
 
     return t.summary();
 }
@@ -41,87 +45,75 @@ static json create_tools() {
     json tools = json::array();
 
     json tool_weather = {
-        {"type", "function"},
-        {"function", {
-            {"name", "get_current_weather"},
-            {"description", "Get the current weather in a given location"},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"location", {
-                        {"type", "string"},
-                        {"description", "The city and state, e.g. San Francisco, CA"}
-                    }},
-                    {"unit", {
-                        {"type", "string"},
-                        {"enum", {"celsius", "fahrenheit"}},
-                        {"description", "The temperature unit to use. Infer this from the users location."}
-                    }}
-                }},
-                {"required", {"location", "unit"}},
-            }},
-        }}
+        { "type",     "function" },
+        { "function",
+         {
+              { "name", "get_current_weather" },
+              { "description", "Get the current weather in a given location" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      { { "location",
+                          { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+                        { "unit",
+                          { { "type", "string" },
+                            { "enum", { "celsius", "fahrenheit" } },
+                            { "description",
+                              "The temperature unit to use. Infer this from the users location." } } } } },
+                    { "required", { "location", "unit" } },
+                } },
+          }                      }
     };
     tools.push_back(tool_weather);
 
     json tool_forecast = {
-        {"type", "function"},
-        {"function", {
-            {"name", "get_forecast"},
-            {"description", "Get the weather forecast for a given location"},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"location", {
-                        {"type", "string"},
-                        {"description", "The city and state, e.g. San Francisco, CA"}
-                    }},
-                    {"unit", {
-                        {"type", "string"},
-                        {"enum", {"celsius", "fahrenheit"}},
-                        {"description", "The temperature unit to use. Infer this from the users location."}
-                    }},
-                    {"days", {
-                        {"type", "integer"},
-                        {"description", "Number of days to forecast (1-10)"},
-                        {"minimum", 1},
-                        {"maximum", 10}
-                    }}
-                }},
-                {"required", {"location", "unit"}},
-            }},
-        }}
+        { "type",     "function" },
+        { "function",
+         {
+              { "name", "get_forecast" },
+              { "description", "Get the weather forecast for a given location" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      { { "location",
+                          { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+                        { "unit",
+                          { { "type", "string" },
+                            { "enum", { "celsius", "fahrenheit" } },
+                            { "description", "The temperature unit to use. Infer this from the users location." } } },
+                        { "days",
+                          { { "type", "integer" },
+                            { "description", "Number of days to forecast (1-10)" },
+                            { "minimum", 1 },
+                            { "maximum", 10 } } } } },
+                    { "required", { "location", "unit" } },
+                } },
+          }                      }
     };
     tools.push_back(tool_forecast);
 
     json tool_search = {
-        {"type", "function"},
-        {"function", {
-            {"name", "search_knowledge_base"},
-            {"description", "Search the internal technical documentation knowledge base."},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"query", {
-                        {"type", "string"},
-                        {"description", "The search query string."}
-                    }},
-                    {"max_results", {
-                        {"type", "integer"},
-                        {"description", "The maximum number of results to return."},
-                        {"default", 5}
-                    }},
-                    {"category", {
-                        {"type", "string"},
-                        {"enum", {"api", "troubleshooting", "billing", "general"}},
-                        {"description", "Filter search by specific category."}
-                    }}
-                }},
-                {"required", {"query", "category"}},
-                {"additionalProperties", false}
-            }},
-            {"strict", true}
-        }}
+        { "type",     "function" },
+        { "function",
+         { { "name", "search_knowledge_base" },
+            { "description", "Search the internal technical documentation knowledge base." },
+            { "parameters",
+              { { "type", "object" },
+                { "properties",
+                  { { "query", { { "type", "string" }, { "description", "The search query string." } } },
+                    { "max_results",
+                      { { "type", "integer" },
+                        { "description", "The maximum number of results to return." },
+                        { "default", 5 } } },
+                    { "category",
+                      { { "type", "string" },
+                        { "enum", { "api", "troubleshooting", "billing", "general" } },
+                        { "description", "Filter search by specific category." } } } } },
+                { "required", { "query", "category" } },
+                { "additionalProperties", false } } },
+            { "strict", true } } }
     };
     tools.push_back(tool_search);
 
@@ -131,39 +123,39 @@ static json create_tools() {
 struct tool_argument {
     std::string name;
     std::string type;
-    bool is_required;
-    json schema;
+    bool        is_required;
+    json        schema;
 };
 
 struct tool_definition {
-    std::string name;
+    std::string                name;
     std::vector<tool_argument> arguments;
-    json schema;
+    json                       schema;
 };
 
 // Test fictitious model output that emits arguments as JSON.
 static void test_example_native(testing & t) {
     struct test_case {
         // Parameters
-        std::string name;
-        json tools;
+        std::string             name;
+        json                    tools;
         common_chat_tool_choice tool_choice;
         common_reasoning_format reasoning_format;
-        json json_schema;
-        bool parallel_tool_calls;
-        bool thinking_forced_open;
-        std::string input;
+        json                    json_schema;
+        bool                    parallel_tool_calls;
+        bool                    thinking_forced_open;
+        std::string             input;
 
         // Expect
-        std::string expect_reasoning;
-        std::string expect_content;
+        std::string                        expect_reasoning;
+        std::string                        expect_content;
         std::vector<common_chat_tool_call> expect_tool_calls;
     };
 
     auto build_parser = [](const test_case & tc) {
-        return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+        return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
             auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
-            auto reasoning = p.eps();
+            auto reasoning            = p.eps();
             if (tc.thinking_forced_open) {
                 // If thinking is forced open, expect a closing tag
                 reasoning = p.reasoning(p.until("</think>")) + "</think>" + p.space();
@@ -174,231 +166,188 @@ static void test_example_native(testing & t) {
 
             // tool calling parser
             if (tc.tools.is_array() && !tc.tools.empty()) {
-                auto tools = p.choice();
-                for (const auto & tool : tc.tools) {
-                    const auto & function = tool.at("function");
-                    std::string name = function.at("name");
-                    const auto & schema = function.at("parameters");
+                auto tool_call =
+                    p.standard_json_tools("<tool_call>[", "]</tool_call>", tc.tools, tc.parallel_tool_calls,
+                                          tc.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED);
 
-                    auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\"");
-                    auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
-
-                    tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}");
-                };
-
-                auto parallel_calls = p.eps();
-                if (tc.parallel_tool_calls) {
-                    parallel_calls = p.zero_or_more("," << tools);
-                }
-
-                auto tool_call = p.trigger_rule("tool-call",
-                    p.sequence({
-                        p.literal("<tool_call>["),
-                        tools,
-                        parallel_calls,
-                        p.literal("]</tool_call>")
-                    })
-                );
-
-                return p.sequence({
-                    (reasoning_in_content ? p.eps() : reasoning),
-                    p.content(p.until("<tool_call>")),
-                    p.optional(p.space() + tool_call),
-                    p.space(),
-                    p.end()
-                });
+                return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.until("<tool_call>")),
+                                    p.optional(p.space() + tool_call), p.space(), p.end() });
             }
 
             // response_format parser
             if (tc.json_schema.is_object() && !tc.json_schema.empty()) {
-                return p.sequence({
-                    (reasoning_in_content ? p.eps() : reasoning),
-                    p.content(p.schema(p.json(), "response-output", tc.json_schema)),
-                    p.space(),
-                    p.end()
-                });
+                return p.sequence({ (reasoning_in_content ? p.eps() : reasoning),
+                                    p.content(p.schema(p.json(), "response-output", tc.json_schema)), p.space(),
+                                    p.end() });
             }
 
             // Content-only parser
-            return p.sequence({
-                (reasoning_in_content ? p.eps() : reasoning),
-                p.content(p.rest()),
-                p.end()
-            });
+            return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.rest()), p.end() });
         });
     };
 
     std::vector<test_case> test_cases = std::vector<test_case>{
         {
-            /* .name =                 */ "content with thinking_forced_open = false",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ false,
-            /* .input =                */ (
-                "<think>The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ false,
+         /* .input =                */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "The user said hello, I must say hello back",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = false and no reasoning",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ false,
-            /* .input =                */ (
-                "Hello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false and no reasoning",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ false,
+         /* .input =                */ ("Hello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = false and reasoning_format = none",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "<think>The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false and reasoning_format = none",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = true",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = true",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "The user said hello, I must say hello back",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = true and reasoning_format = none",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = true and reasoning_format = none",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "tools with tool_choice = auto and no parallel_tool_calls",
-            /* .tools =                */ create_tools(),
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must get the weather in New York</think>\n"
-                "<tool_call>["
-                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
-                "]</tool_call>"
-            ),
-            /* .expect_reasoning =     */ "I must get the weather in New York",
-            /* .expect_content =       */ "",
-            /* .expect_tool_calls =    */ {{
+         /* .name =                 */ "tools with tool_choice = auto and no parallel_tool_calls",
+         /* .tools =                */ create_tools(),
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must get the weather in New York</think>\n"
+             "<tool_call>["
+             R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+             "]</tool_call>"),
+         /* .expect_reasoning =     */ "I must get the weather in New York",
+         /* .expect_content =       */ "",
+         /* .expect_tool_calls =    */
+            { {
                 /* .name =      */ "get_current_weather",
                 /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
                 /* .id =        */ "",
-            }},
-        },
+            } },
+         },
         {
-            /* .name =                 */ "tools with tool_choice = auto and parallel_tool_calls",
-            /* .tools =                */ create_tools(),
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ true,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me search that for you."
-                "<tool_call>["
-                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
-                ", "
-                R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
-                ", "
-                R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
-                ", "
-                R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
-                "]</tool_call>"
-            ),
-            /* .expect_reasoning =     */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.",
-            /* .expect_content =       */ "Let me search that for you.",
-            /* .expect_tool_calls =    */ {{
-                /* .name =      */ "get_current_weather",
-                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_current_weather",
-                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_forecast",
-                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_forecast",
-                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
-                /* .id =        */ "",
-            }},
-        },
+         /* .name =                 */ "tools with tool_choice = auto and parallel_tool_calls",
+         /* .tools =                */ create_tools(),
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ true,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me "
+             "search that for you."
+             "<tool_call>["
+             R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+             ", "
+             R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
+             ", "
+             R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
+             ", "
+             R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
+             "]</tool_call>"),
+         /* .expect_reasoning =     */
+            "I must get the weather in New York and San Francisco and a 3 day forecast of each.",                                                                     /* .expect_content =       */ "Let me search that for you.",
+         /* .expect_tool_calls =    */
+            { {
+                  /* .name =      */ "get_current_weather",
+                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_current_weather",
+                  /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_forecast",
+                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_forecast",
+                  /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
+                  /* .id =        */ "",
+              } },
+         },
         {
-            /* .name =                 */ "response_format with thinking_forced_open = true",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {
-                {"type", "object"},
-                {"properties", {
-                    {"invoice_number", {{"type", "string"}}},
-                    {"amount", {{"type", "number"}}},
-                    {"due_date", {{"type", "string"}}}
-                }},
-                {"required", {"invoice_number", "amount", "due_date"}}
-            },
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must produce the invoice in the requested format</think>\n"
-                R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"
-            ),
-            /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
-            /* .expect_content =       */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "response_format with thinking_forced_open = true",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */
+            { { "type", "object" },
+              { "properties",
+                { { "invoice_number", { { "type", "string" } } },
+                  { "amount", { { "type", "number" } } },
+                  { "due_date", { { "type", "string" } } } } },
+              { "required", { "invoice_number", "amount", "due_date" } } },
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must produce the invoice in the requested format</think>\n"
+             R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"),
+         /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
+         /* .expect_content =       */
+            R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", /* .expect_tool_calls =    */ {},
+         },
     };
 
     for (const auto & tc : test_cases) {
         t.test(tc.name, [&](testing & t) {
-            auto parser = build_parser(tc);
-            auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+            auto parser  = build_parser(tc);
+            auto lazy    = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
             auto grammar = build_grammar([&](const common_grammar_builder & builder) {
-                for (auto const & def : tc.tools) {
-                    auto function = def.at("function");
+                for (const auto & def : tc.tools) {
+                    auto function   = def.at("function");
                     auto parameters = function.at("parameters");
                     builder.resolve_refs(parameters);
                 };
@@ -406,17 +355,17 @@ static void test_example_native(testing & t) {
             });
 
             t.log("Grammar:");
-            for (auto const & line : string_split(grammar, "\n")) {
+            for (const auto & line : string_split(grammar, "\n")) {
                 t.log(line);
             }
 
             common_peg_parse_context ctx(tc.input, false);
-            auto result = parser.parse(ctx);
+            auto                     result = parser.parse(ctx);
 
             t.assert_true("success", result.success());
 
             common_chat_msg msg;
-            auto mapper = common_chat_peg_native_mapper(msg);
+            auto            mapper = common_chat_peg_unified_mapper(msg);
             mapper.from_ast(ctx.ast, result);
 
             t.assert_equal("content equal", tc.expect_content, msg.content);
@@ -431,16 +380,16 @@ static void test_example_native(testing & t) {
 }
 
 static void test_example_qwen3_coder(testing & t) {
-    auto tools = create_tools();
-    auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+    auto tools  = create_tools();
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
         auto content = p.rule("content", p.content(p.until("<tool_call>")));
 
         std::vector<common_peg_parser> tool_parsers;
-        for (auto const & def : tools) {
-            auto function = def.at("function");
-            std::string name = function.at("name");
-            auto parameters = function.at("parameters");
-            auto properties = parameters.at("properties");
+        for (const auto & def : tools) {
+            auto        function   = def.at("function");
+            std::string name       = function.at("name");
+            auto        parameters = function.at("parameters");
+            auto        properties = parameters.at("properties");
 
             std::set<std::string> required_properties;
             if (function.contains("required")) {
@@ -450,59 +399,36 @@ static void test_example_qwen3_coder(testing & t) {
             std::vector<common_peg_parser> arg_parsers;
             for (const auto & [param_name, param_schema] : properties.items()) {
                 bool is_required = required_properties.find(param_name) != required_properties.end();
-                auto type = param_schema.value("type", "object");
+                auto type        = param_schema.value("type", "object");
 
-                auto arg = p.tool_arg(p.sequence({
-                    p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
-                    (type == "string" ?
-                        p.tool_arg_string_value(
-                            p.schema(
-                                p.until_one_of({
-                                    "</parameter>\n<parameter=",
-                                    "</parameter>\n</function>"
-                                }),
-                                "tool-" + name + "-arg-" + param_name + "-schema",
-                                param_schema,
-                                true
-                            )
-                        ) : p.tool_arg_json_value(
-                            p.schema(
-                                p.json(),
-                                "tool-" + name + "-arg-" + param_name + "-schema",
-                                param_schema
-                            )
-                        )
-                    ),
-                    p.tool_arg_close(
-                        "</parameter>\n" +
-                        p.peek(p.literal("<parameter=") | p.literal("</function>"))
-                    )
-                }));
+                auto arg = p.tool_arg(
+                    p.sequence({ p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
+                                 (type == "string" ?
+                                      p.tool_arg_string_value(p.schema(
+                                          p.until_one_of({ "</parameter>\n<parameter=", "</parameter>\n</function>" }),
+                                          "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
+                                      p.tool_arg_json_value(p.schema(
+                                          p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema))),
+                                 p.tool_arg_close("</parameter>\n" +
+                                                  p.peek(p.literal("<parameter=") | p.literal("</function>"))) }));
 
-                arg_parsers.push_back(is_required ?
-                    p.rule("tool-" + name + "-arg-" + param_name, arg) :
-                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+                arg_parsers.push_back(is_required ? p.rule("tool-" + name + "-arg-" + param_name, arg) :
+                                                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
             }
 
-            tool_parsers.push_back(p.rule("tool-" + name,
-                p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
-                << p.sequence(arg_parsers)
-                << p.tool_close(p.literal("</function>"))
-            ));
+            tool_parsers.push_back(p.rule("tool-" + name, p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
+                                                              << p.sequence(arg_parsers)
+                                                              << p.tool_close(p.literal("</function>"))));
         };
 
-        auto tool_call = p.trigger_rule("tool-call",
-            "<tool_call>"
-            << p.choice(tool_parsers)
-            << "</tool_call>"
-        );
+        auto tool_call = p.trigger_rule("tool-call", "<tool_call>" << p.choice(tool_parsers) << "</tool_call>");
 
         return content + p.zero_or_more(p.space() + tool_call) + p.end();
     });
 
     auto grammar = build_grammar([&](const common_grammar_builder & builder) {
-        for (auto const & def : tools) {
-            auto function = def.at("function");
+        for (const auto & def : tools) {
+            auto function   = def.at("function");
             auto parameters = function.at("parameters");
             builder.resolve_refs(parameters);
         };
@@ -510,11 +436,11 @@ static void test_example_qwen3_coder(testing & t) {
     });
 
     t.log("Grammar:");
-    for (auto const & line : string_split(grammar, "\n")) {
+    for (const auto & line : string_split(grammar, "\n")) {
         t.log(line);
     }
 
-    t.test("incremental parsing", [&](testing &t) {
+    t.test("incremental parsing", [&](testing & t) {
         std::string input =
             "Let me search the knowledge base for cat pictures."
             "<tool_call>\n"
@@ -538,7 +464,7 @@ static void test_example_qwen3_coder(testing & t) {
             }
 
             common_chat_msg msg;
-            auto mapper = common_chat_peg_constructed_mapper(msg);
+            auto            mapper = common_chat_peg_unified_mapper(msg);
             mapper.from_ast(ctx.ast, result);
 
             //t.log("Input: " + input);
@@ -554,7 +480,105 @@ static void test_example_qwen3_coder(testing & t) {
             try {
                 // This shouldn't emit any runtime errors
                 auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
-            } catch(const std::exception & e) {
+            } catch (const std::exception & e) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                t.assert_true(std::string("failed with ") + e.what(), false);
+            }
+
+            prev = msg;
+        }
+    });
+}
+
+static void test_example_qwen3_non_coder(testing & t) {
+    auto tools  = create_tools();
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        // tool calling parser using standard JSON format
+        auto tool_call = p.standard_json_tools("<tool_call>", "</tool_call>", tools, true, false);
+
+        return p.sequence({ p.content(p.until("<tool_call>")), p.optional(p.space() + tool_call), p.end() });
+    });
+
+    auto grammar = build_grammar([&](const common_grammar_builder & builder) {
+        for (const auto & def : tools) {
+            auto function   = def.at("function");
+            auto parameters = function.at("parameters");
+            builder.resolve_refs(parameters);
+        };
+        parser.build_grammar(builder);
+    });
+
+    t.log("Grammar:");
+    for (const auto & line : string_split(grammar, "\n")) {
+        t.log(line);
+    }
+
+    t.test("tool call parsing", [&](testing & t) {
+        std::string input =
+            "I need to get the weather.\n"
+            "<tool_call>"
+            "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+            "\"fahrenheit\"}}"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_unified_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "I need to get the weather.", msg.content);
+        t.assert_equal("reasoning", "", msg.reasoning_content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+            t.assert_equal("tool args", "{\"location\": \"New York City, NY\", \"unit\": \"fahrenheit\"}",
+                           msg.tool_calls[0].arguments);
+        }
+    });
+
+    t.test("incremental parsing", [&](testing & t) {
+        std::string input =
+            "I need to get the weather.\n"
+            "<tool_call>"
+            "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+            "\"fahrenheit\"}}"
+            "</tool_call>";
+
+        std::vector<std::string> tokens = simple_tokenize(input);
+
+        common_chat_msg prev;
+        for (auto it = tokens.begin(); it != tokens.end(); it++) {
+            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+            common_peg_parse_context ctx(in, it + 1 < tokens.end());
+
+            auto result = parser.parse(ctx);
+            if (!t.assert_equal("not fail", false, result.fail())) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+            }
+
+            common_chat_msg msg;
+            auto            mapper = common_chat_peg_unified_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            //t.log("Input: " + input);
+            t.log("===========================================");
+            t.log("Iteration " + std::to_string(in.size()));
+            t.log("Reasoning: " + msg.reasoning_content);
+            t.log("Content  : " + msg.content);
+            for (const auto & tc : msg.tool_calls) {
+                t.log("Tool name: " + tc.name);
+                t.log("Tool args: " + tc.arguments);
+            }
+
+            try {
+                // This shouldn't emit any runtime errors
+                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+            } catch (const std::exception & e) {
                 t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
                 t.assert_true(std::string("failed with ") + e.what(), false);
             }
@@ -565,38 +589,37 @@ static void test_example_qwen3_coder(testing & t) {
 }
 
 void test_command7_parser_compare(testing & t) {
-    auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) {
-        auto thinking = p.reasoning_block(
-            "<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
+    auto parser = build_chat_peg_unified_parser([](common_chat_peg_unified_builder & p) {
+        auto thinking =
+            p.reasoning_block("<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
 
         auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>";
 
         auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\"")));
-        auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
+        auto tool_call_name =
+            p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
         auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json()));
 
         auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args);
-        auto tool_call = p.rule("tool-call", p.tool(
-            p.tool_open(p.literal("{"))
-            << tool_call_fields
-            << p.zero_or_more( p.literal(",") << tool_call_fields)
-            << p.tool_close(p.literal("}"))
-        ));
+        auto tool_call =
+            p.rule("tool-call", p.tool(p.tool_open(p.literal("{"))
+                                       << tool_call_fields << p.zero_or_more(p.literal(",") << tool_call_fields)
+                                       << p.tool_close(p.literal("}"))));
 
-        auto tool_calls = p.rule("tool-calls",
-            "<|START_ACTION|>"
-            << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
-            << "<|END_ACTION|>");
+        auto tool_calls = p.rule(
+            "tool-calls", "<|START_ACTION|>" << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
+                                             << "<|END_ACTION|>");
 
         return p.optional(thinking) << (tool_calls | response) + p.end();
     });
 
-    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) {
+    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial,
+                            bool print_results) {
         common_peg_parse_context ctx(input, is_partial);
-        auto result = p.parse(ctx);
+        auto                     result = p.parse(ctx);
 
         common_chat_msg msg;
-        auto mapper = common_chat_peg_native_mapper(msg);
+        auto            mapper = common_chat_peg_unified_mapper(msg);
         mapper.from_ast(ctx.ast, result);
 
         if (print_results) {
@@ -614,79 +637,19 @@ void test_command7_parser_compare(testing & t) {
         }
     };
 
-    auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) {
-        // Original common_chat_combinator_parser taken from chat.cpp
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_GENERIC;
-        params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        common_chat_msg_parser builder(
-            input,
-            /* .is_partial = */ need_more_input,
-            params
-        );
+    std::string reasoning =
+        "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
+        "budget of $4000 for a two-week stay, we need to:\n\n"
+        "1. Identify key historical sites and modern attractions in Japan.\n"
+        "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
+        "3. Determine the best modes of transportation for getting around Japan.\n"
+        "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
+        "overspending.\n"
+        "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
+        "to attractions.";
 
-        builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
-        static const common_regex start_action_regex("<\\|START_ACTION\\|>");
-        static const common_regex end_action_regex("<\\|END_ACTION\\|>");
-        static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
-        static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
-        if (auto res = builder.try_find_regex(start_action_regex)) {
-            // If we didn't extract thoughts, prelude includes them.
-            auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } });
-            for (const auto & tool_call : tool_calls.value) {
-                std::string name      = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
-                std::string id        = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
-                std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
-                if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-            }
-            if (tool_calls.is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-            builder.consume_regex(end_action_regex);
-        } else if (auto res = builder.try_find_regex(start_response_regex)) {
-            if (!builder.try_find_regex(end_response_regex)) {
-                builder.add_content(builder.consume_rest());
-                throw common_chat_msg_partial_exception(end_response_regex.str());
-            }
-        } else {
-            builder.add_content(builder.consume_rest());
-        }
-
-        if (print_results) {
-            std::cout << "== Parsed (legacy) ==\n";
-            std::cout << "=== Reasoning ===\n";
-            std::cout << builder.result().reasoning_content << "\n";
-            std::cout << "\n\n=== Content ===\n";
-            std::cout << builder.result().content << "\n";
-            std::cout << "\n\n=== Tool Calls ===\n";
-            for (const auto & tc : builder.result().tool_calls) {
-                std::cout << "id: " << tc.id << "\n";
-                std::cout << "name: " << tc.name << "\n";
-                std::cout << "args: " << tc.arguments << "\n";
-            }
-        }
-    };
-
-    std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
-            "budget of $4000 for a two-week stay, we need to:\n\n"
-            "1. Identify key historical sites and modern attractions in Japan.\n"
-            "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
-            "3. Determine the best modes of transportation for getting around Japan.\n"
-            "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
-            "overspending.\n"
-            "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
-            "to attractions.";
-
-    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {{
-        "call_0",
-        "plan_trip",
-        nlohmann::json::parse(R"({
+    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {
+        { "call_0", "plan_trip", nlohmann::json::parse(R"({
             "destination": "Japan",
             "duration": 14,
             "budget": 4000,
@@ -694,8 +657,8 @@ void test_command7_parser_compare(testing & t) {
             "accommodation_preferences": "affordable",
             "transportation_preferences": "efficient",
             "meal_preferences": "local cuisine"
-        })")
-    }};
+        })") }
+    };
 
     std::vector<std::string> tokens;
 
@@ -712,10 +675,10 @@ void test_command7_parser_compare(testing & t) {
 
         auto json = nlohmann::json::array();
         for (const auto & tc : tool_calls) {
-            auto tc_json = nlohmann::json::object();
+            auto tc_json            = nlohmann::json::object();
             tc_json["tool_call_id"] = std::get<0>(tc);
-            tc_json["tool_name"] = std::get<1>(tc);
-            tc_json["parameters"] = std::get<2>(tc);
+            tc_json["tool_name"]    = std::get<1>(tc);
+            tc_json["parameters"]   = std::get<2>(tc);
             json.push_back(tc_json);
         }
 
@@ -727,42 +690,191 @@ void test_command7_parser_compare(testing & t) {
 
     std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string());
 
-    // Run tests
-    t.test("legacy_parse", [&](testing & /* t */) {
-        test_legacy(input, false, false);
+    t.test("current_parse", [&](testing & /* t */) { test_current(parser, input, false, false); });
+    t.bench("current_parse_benchmark complete", [&]() { test_current(parser, input, false, false); }, 100);
+    t.bench(
+        "current_parse_benchmark incremental",
+        [&]() {
+            std::string in;
+            for (auto i = 0u; i < tokens.size(); i++) {
+                in += tokens[i];
+                test_current(parser, in, i + 1 < tokens.size(), false);
+            }
+        },
+        20);
+}
+
+// Test that tool names that are proper prefixes of other tool names don't cause
+// premature matching during incremental parsing.
+// For example, "special_function" should not match when parsing "special_function_with_opt".
+static void test_prefix_tool_names(testing & t) {
+    // Create tools where one name is a proper prefix of another
+    json tools = json::array();
+
+    json tool_short = {
+        { "type", "function" },
+        { "function",
+          {
+              { "name", "special_function" },
+              { "description", "A special function" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      {
+                          { "arg1", { { "type", "integer" } } },
+                      } },
+                    { "required", { "arg1" } },
+                } },
+          } }
+    };
+    tools.push_back(tool_short);
+
+    json tool_long = {
+        { "type", "function" },
+        { "function",
+          {
+              { "name", "special_function_with_opt" },
+              { "description", "A special function with optional params" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      {
+                          { "arg1", { { "type", "integer" } } },
+                          { "arg2", { { "type", "integer" } } },
+                      } },
+                    { "required", { "arg1" } },
+                } },
+          } }
+    };
+    tools.push_back(tool_long);
+
+    // Use standard_constructed_tools which had the prefix matching bug
+    std::map<std::string, std::string> markers = {
+        { "tool_call_start_marker", "<tool_call>" },
+        { "tool_call_end_marker", "</tool_call>" },
+        { "function_opener", "<function=" },
+        { "function_closer", "</function>" },
+        { "function_name_suffix", ">" },
+        { "parameter_key_prefix", "<param=" },
+        { "parameter_key_suffix", ">" },
+        { "parameter_closer", "</param>" },
+    };
+
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        auto content   = p.rule("content", p.content(p.until("<tool_call>")));
+        auto tool_call = p.standard_constructed_tools(markers, tools, false, false);
+        return content + p.zero_or_more(p.space() + tool_call) + p.end();
     });
 
-    t.test("current_parse", [&](testing & /* t */) {
-        test_current(parser, input, false, false);
+    // Test parsing the long tool name - this should NOT trigger the short tool name
+    t.test("parse long tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function_with_opt>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_unified_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "Let me call the function.", msg.content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "special_function_with_opt", msg.tool_calls[0].name);
+        }
     });
 
-    // Run benchmarks
-    t.bench("legacy_parse_benchmark complete", [&]() {
-        test_legacy(input, false, false);
-    });
+    // Test incremental parsing - the key test case
+    // This ensures that when incrementally parsing "special_function_with_opt",
+    // we don't prematurely emit "special_function" as a tool call
+    t.test("incremental parse long tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function_with_opt>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
 
-    t.bench("legacy_parse_benchmark incremental", [&]() {
-        std::string in;
-        for (auto i = 0u; i < tokens.size(); i++) {
-            in += tokens[i];
+        std::vector<std::string> tokens = simple_tokenize(input);
+
+        common_chat_msg prev;
+        for (auto it = tokens.begin(); it != tokens.end(); it++) {
+            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+            common_peg_parse_context ctx(in, it + 1 < tokens.end());
+            auto                     result = parser.parse(ctx);
+
+            if (!t.assert_equal("not fail", false, result.fail())) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                return;
+            }
+
+            common_chat_msg msg;
+            auto            mapper = common_chat_peg_unified_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            // The critical check: during incremental parsing, we should never
+            // see "special_function" as the tool name when parsing "special_function_with_opt"
+            for (const auto & tc : msg.tool_calls) {
+                if (!t.assert_equal("tool name should not be short prefix", false,
+                                    tc.name == "special_function")) {
+                    t.log("Premature tool name match at input: " + in);
+                    return;
+                }
+            }
 
             try {
-                test_legacy(in, i + 1 < tokens.size(), false);
-            } catch (common_chat_msg_partial_exception & /* e */) {
-                // Do nothing, this is expected
+                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+            } catch (const std::exception & e) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                t.assert_true(std::string("diff failed with ") + e.what(), false);
+                return;
             }
-        }
-    }, 20);
 
-    t.bench("current_parse_benchmark complete", [&]() {
-        test_current(parser, input, false, false);
-    }, 100);
-
-    t.bench("current_parse_benchmark incremental", [&]() {
-        std::string in;
-        for (auto i = 0u; i < tokens.size(); i++) {
-            in += tokens[i];
-            test_current(parser, in, i + 1 < tokens.size(), false);
+            prev = msg;
         }
-    }, 20);
+
+        // Final check: the complete parse should have the correct tool name
+        t.assert_equal("final tool calls count", 1u, prev.tool_calls.size());
+        if (!prev.tool_calls.empty()) {
+            t.assert_equal("final tool name", "special_function_with_opt", prev.tool_calls[0].name);
+        }
+    });
+
+    // Test parsing the short tool name still works
+    t.test("parse short tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_unified_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "Let me call the function.", msg.content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "special_function", msg.tool_calls[0].name);
+        }
+    });
 }
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
deleted file mode 100644
index 27b537a036..0000000000
--- a/tests/test-chat-template.cpp
+++ /dev/null
@@ -1,680 +0,0 @@
-#include <string>
-#include <vector>
-#include <sstream>
-#include <regex>
-#include <iostream>
-#include <fstream>
-#include <filesystem>
-
-#include <nlohmann/json.hpp>
-
-#undef NDEBUG
-#include <cassert>
-
-#include "llama.h"
-#include "common.h"
-#include "chat.h"
-#include "jinja/runtime.h"
-#include "jinja/parser.h"
-#include "jinja/lexer.h"
-#include "jinja/caps.h"
-
-using json = nlohmann::ordered_json;
-
-int main_automated_tests(void);
-
-void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false);
-void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = "");
-
-
-
-std::string HELP = R"(
-Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE
-Options:
-  -h, --help               Show this help message and exit.
-  --json <path>            Path to the JSON input file.
-  --stop-on-first-fail     Stop testing on the first failure (default: false).
-  --no-common              Use direct Jinja engine instead of common chat templates (default: use common).
-  --output <path>          Path to output results (only for single template runs).
-If PATH_TO_TEMPLATE is a file, runs that single template.
-If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory.
-If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode).
-)";
-
-std::string DEFAULT_JSON = R"({
-    "messages": [
-        {
-            "role": "user",
-            "content": "Hello, how are you?"
-        },
-        {
-            "role": "assistant",
-            "content": "I am fine, thank you!"
-        }
-    ],
-    "bos_token": "<s>",
-    "eos_token": "</s>",
-    "add_generation_prompt": true
-})";
-
-int main(int argc, char ** argv) {
-    std::vector<std::string> args(argv, argv + argc);
-
-    std::string tmpl_path;
-    std::string json_path;
-    std::string output_path;
-    bool stop_on_first_fail = false;
-    bool use_common = true;
-
-    for (size_t i = 1; i < args.size(); i++) {
-        if (args[i] == "--help" || args[i] == "-h") {
-            std::cout << HELP << "\n";
-            return 0;
-        } else if (args[i] == "--json" && i + 1 < args.size()) {
-            json_path = args[i + 1];
-            i++;
-        } else if (args[i] == "--stop-on-first-fail") {
-            stop_on_first_fail = true;
-        } else if (args[i] == "--output" && i + 1 < args.size()) {
-            output_path = args[i + 1];
-            i++;
-        } else if (args[i] == "--no-common") {
-            use_common = true;
-        } else if (tmpl_path.empty()) {
-            tmpl_path = args[i];
-        } else {
-            std::cerr << "Unknown argument: " << args[i] << "\n";
-            std::cout << HELP << "\n";
-            return 1;
-        }
-    }
-
-    if (tmpl_path.empty()) {
-        return main_automated_tests();
-    }
-
-    json input_json;
-    if (!json_path.empty()) {
-        std::ifstream json_file(json_path);
-        if (!json_file) {
-            std::cerr << "Error: Could not open JSON file: " << json_path << "\n";
-            return 1;
-        }
-        std::string content = std::string(
-            std::istreambuf_iterator<char>(json_file),
-            std::istreambuf_iterator<char>());
-        input_json = json::parse(content);
-    } else {
-        input_json = json::parse(DEFAULT_JSON);
-    }
-
-    std::filesystem::path p(tmpl_path);
-    if (std::filesystem::is_directory(p)) {
-        run_multiple(tmpl_path, stop_on_first_fail, input_json, use_common);
-    } else if (std::filesystem::is_regular_file(p)) {
-        std::ifstream infile(tmpl_path);
-        std::string contents = std::string(
-            std::istreambuf_iterator<char>(infile),
-            std::istreambuf_iterator<char>());
-        run_single(contents, input_json, use_common, output_path);
-    } else {
-        std::cerr << "Error: PATH_TO_TEMPLATE is not a valid file or directory: " << tmpl_path << "\n";
-        return 1;
-    }
-
-    return 0;
-}
-
-void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) {
-    std::vector<std::string> failed_tests;
-
-    // list all files in models/templates/ and run each
-    size_t test_count = 0;
-
-    for (const auto & entry : std::filesystem::directory_iterator(dir_path)) {
-        // only process .jinja files
-        if (entry.path().extension() == ".jinja" && entry.is_regular_file()) {
-            test_count++;
-            std::cout << "\n\n=== RUNNING TEMPLATE FILE: " << entry.path().string() << " ===\n";
-            std::ifstream infile(entry.path());
-            std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
-            try {
-                run_single(contents, input, use_common);
-            } catch (const std::exception & e) {
-                std::cout << "Exception: " << e.what() << "\n";
-                std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n";
-                failed_tests.push_back(entry.path().string());
-                if (stop_on_first_fail) {
-                    break;
-                }
-            }
-        }
-    }
-
-    std::cout << "\n\n=== TEST SUMMARY ===\n";
-    std::cout << "Total tests run: " << test_count << "\n";
-    std::cout << "Total failed tests: " << failed_tests.size() << "\n";
-    for (const auto & test : failed_tests) {
-        std::cout << "FAILED TEST: " << test << "\n";
-    }
-}
-
-
-static std::string normalize_newlines(const std::string & s) {
-#ifdef _WIN32
-  static const std::regex nl_regex("\r\n");
-  return std::regex_replace(s, nl_regex, "\n");
-#else
-  return s;
-#endif
-}
-
-
-static std::string format_using_common(
-            const std::string & template_str,
-            const std::string & bos_token,
-            const std::string & eos_token,
-            std::vector<common_chat_msg> & messages,
-            std::vector<common_chat_tool> tools = {}) {
-    auto tmpls = common_chat_templates_init(/* model= */ nullptr, template_str, bos_token, eos_token);
-    common_chat_templates_inputs inputs;
-    inputs.use_jinja = true;
-    inputs.messages = messages;
-    inputs.tools = tools;
-    inputs.add_generation_prompt = true;
-    auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt;
-    output = normalize_newlines(output);
-    return output;
-}
-
-
-// skip libcommon, use direct jinja engine
-static jinja::value_string format_using_direct_engine(
-            const std::string & template_str,
-            json & input) {
-    // lexing
-    jinja::lexer lexer;
-    auto lexer_res = lexer.tokenize(template_str);
-
-    // compile to AST
-    jinja::program ast = jinja::parse_from_tokens(lexer_res);
-
-    // check caps for workarounds
-    jinja::caps_get(ast);
-
-    std::cout << "\n=== RUN ===\n";
-    jinja::context ctx(template_str);
-
-    jinja::global_from_json(ctx, input, true);
-
-    jinja::runtime runtime(ctx);
-    const jinja::value results = runtime.execute(ast);
-    auto parts = runtime.gather_string_parts(results);
-
-    std::cout << "\n=== RESULTS ===\n";
-    for (const auto & part : parts->as_string().parts) {
-        std::cout << (part.is_input ? "DATA" : "TMPL") << ": " << part.val << "\n";
-    }
-
-    return parts;
-}
-
-
-void run_single(std::string contents, json input, bool use_common, const std::string & output_path) {
-    jinja::enable_debug(true);
-
-    jinja::value_string output_parts;
-
-    if (use_common) {
-        std::string bos_token = "<s>";
-        std::string eos_token = "</s>";
-        if (input.contains("bos_token")) {
-            bos_token = input["bos_token"].get<std::string>();
-        }
-        if (input.contains("eos_token")) {
-            eos_token = input["eos_token"].get<std::string>();
-        }
-        nlohmann::ordered_json msgs_json = input["messages"];
-        nlohmann::ordered_json tools_json = input["tools"];
-        auto messages = common_chat_msgs_parse_oaicompat(msgs_json);
-        auto tools = common_chat_tools_parse_oaicompat(tools_json);
-        auto output = format_using_common(contents, bos_token, eos_token, messages, tools);
-        std::cout << "\n=== OUTPUT ===\n";
-        std::cout << output << "\n";
-        output_parts = jinja::mk_val<jinja::value_string>(output);
-
-    } else {
-        output_parts = format_using_direct_engine(contents, input);
-        std::cout << "\n=== OUTPUT ===\n";
-        std::cout << output_parts->as_string().str() << "\n";
-    }
-
-    if (!output_path.empty()) {
-        std::ofstream outfile(output_path);
-        if (!outfile) {
-            throw std::runtime_error("Could not open output file: " + output_path);
-        }
-        outfile << output_parts->as_string().str();
-        outfile.close();
-        std::cout << "\n=== OUTPUT WRITTEN TO " << output_path << " ===\n";
-    }
-}
-
-
-
-
-
-//
-// Automated tests for chat templates
-//
-
-#define U8C(x) (const char*)(u8##x)
-
-static common_chat_msg simple_msg(const std::string & role, const std::string & content) {
-    common_chat_msg msg;
-    msg.role = role;
-    msg.content = content;
-    return msg;
-}
-
-int main_automated_tests(void) {
-    // jinja::enable_debug(true);
-
-    std::vector<llama_chat_message> conversation {
-        {"system", "You are a helpful assistant"},
-        {"user", "Hello"},
-        {"assistant", "Hi there"},
-        {"user", "Who are you"},
-        {"assistant", "   I am an assistant   "},
-        {"user", "Another question"},
-    };
-
-    // std::string wrong = /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}";
-    struct TestCase {
-        std::string name;
-        std::string template_str;
-        std::string expected_output;
-        std::string expected_output_jinja;
-        std::string bos_token = "";
-        std::string eos_token = "";
-        bool supported_with_jinja = true;
-    };
-    std::vector<TestCase> test_cases {
-        {
-            /* .name= */ "teknium/OpenHermes-2.5-Mistral-7B",
-            /* .template_str= */ "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
-            /* .expected_output= */ "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\nHi there<|im_end|>\n<|im_start|>user\nWho are you<|im_end|>\n<|im_start|>assistant\n   I am an assistant   <|im_end|>\n<|im_start|>user\nAnother question<|im_end|>\n<|im_start|>assistant\n",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (NOTE: Old pre-v1 without a system prompt)",
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
-            /* .expected_output= */ "[INST] You are a helpful assistant\nHello [/INST]Hi there</s>[INST] Who are you [/INST]   I am an assistant   </s>[INST] Another question [/INST]",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "TheBloke/FusionNet_34Bx2_MoE-AWQ",
-            /* .template_str= */ "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' '  + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
-            /* .expected_output= */       "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST]Hi there</s><s>[INST] Who are you [/INST]   I am an assistant   </s><s>[INST] Another question [/INST]",
-            /* .expected_output_jinja= */ "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST] Hi there </s><s>[INST] Who are you [/INST]    I am an assistant    </s><s>[INST] Another question [/INST]",
-            /* .bos_token= */ "<s>",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "bofenghuang/vigogne-2-70b-chat",
-            /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'Vous êtes Vigogne, un assistant IA créé par Zaion Lab. Vous suivez extrêmement bien les instructions. Aidez autant que vous le pouvez.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
-            /* .expected_output= */       "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST]Hi there</s>[INST] Who are you [/INST]I am an assistant</s>[INST] Another question [/INST]",
-            /* .expected_output_jinja= */ "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST] Hi there </s>[INST] Who are you [/INST] I am an assistant </s>[INST] Another question [/INST]",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "mlabonne/AlphaMonarch-7B",
-            /* .template_str= */ "{% for message in messages %}{{bos_token + message['role'] + '\\n' + message['content'] + eos_token + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\\n' }}{% endif %}",
-            /* .expected_output= */ "system\nYou are a helpful assistant</s>\n<s>user\nHello</s>\n<s>assistant\nHi there</s>\n<s>user\nWho are you</s>\n<s>assistant\n   I am an assistant   </s>\n<s>user\nAnother question</s>\n<s>assistant\n",
-            /* .expected_output_jinja= */ "<s>system\nYou are a helpful assistant</s>\n<s>user\nHello</s>\n<s>assistant\nHi there</s>\n<s>user\nWho are you</s>\n<s>assistant\n   I am an assistant   </s>\n<s>user\nAnother question</s>\n<s>assistant\n",
-            /* .bos_token= */ "<s>",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "google/gemma-7b-it",
-            /* .template_str= */ "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\\n' + message['content'] | trim + '<end_of_turn>\\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\\n'}}{% endif %}",
-            /* .expected_output= */       "<start_of_turn>user\nYou are a helpful assistant\n\nHello<end_of_turn>\n<start_of_turn>model\nHi there<end_of_turn>\n<start_of_turn>user\nWho are you<end_of_turn>\n<start_of_turn>model\nI am an assistant<end_of_turn>\n<start_of_turn>user\nAnother question<end_of_turn>\n<start_of_turn>model\n",
-            /* .expected_output_jinja= */ "<start_of_turn>user\nYou are a helpful assistant\nHello<end_of_turn>\n<start_of_turn>model\nHi there<end_of_turn>\n<start_of_turn>user\nWho are you<end_of_turn>\n<start_of_turn>model\nI am an assistant<end_of_turn>\n<start_of_turn>user\nAnother question<end_of_turn>\n<start_of_turn>model\n",
-        },
-        {
-            /* .name= */ "OrionStarAI/Orion-14B-Chat",
-            /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
-            /* .expected_output= */       "Human: You are a helpful assistant\n\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s>   I am an assistant   </s>Human: Another question\n\nAssistant: </s>",
-            /* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s>   I am an assistant   </s>Human: Another question\n\nAssistant: </s>",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "openchat/openchat-3.5-0106",
-            // The included chat_template differs from the author's suggestions here: https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d
-            // So we match against the included template but implement the suggested version.
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
-            /* .expected_output= */                            "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant:    I am an assistant   <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:",
-            /* .expected_output_jinja= */ "GPT4 Correct System: You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant:    I am an assistant   <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:",
-        },
-        {
-            /* .name= */ "deepseek-ai/deepseek-coder-33b-instruct",
-            /* .template_str= */ "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
-            /* .expected_output= */ "You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n   I am an assistant   \n<|EOT|>\n### Instruction:\nAnother question\n### Response:\n",
-            /* .expected_output_jinja= */ "",
-        },
-        {
-            /* .name= */ "eachadea/vicuna-13b-1.1",
-            // No template included in tokenizer_config.json, so this template likely needs to be manually set.
-            /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
-            /* .expected_output= */ "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT:    I am an assistant   </s>\nUSER: Another question\nASSISTANT:",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "Orca-Vicuna",
-            // No template included in tokenizer_config.json, so this template likely needs to be manually set.
-            /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
-            /* .expected_output= */ "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT:    I am an assistant   </s>\nUSER: Another question\nASSISTANT:",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "CohereForAI/c4ai-command-r-plus",
-            /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
-            /* .expected_output= */ "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
-            /* .expected_output_jinja= */ "",
-        },
-        {
-            /* .name= */ "Llama-3",
-            /* .template_str= */ "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
-            /* .expected_output= */ "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
-            /* .expected_output_jinja= */ "",
-        },
-        {
-            /* .name= */ "Phi-3-mini",
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
-            /* .expected_output= */     "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-            /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-        },
-        {
-            /* .name= */ "Phi-3-small",
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
-            /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-            /* .expected_output_jinja= */ "",
-        },
-        {
-            /* .name= */ "Phi-3-medium",
-            /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
-            /* .expected_output= */     "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-            /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-        },
-        {
-            /* .name= */ "Phi-3-vision",
-            /* .template_str= */ "{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %}",
-            /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "ChatGLM3",
-            /* .template_str= */ "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
-            /* .expected_output= */       "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n    I am an assistant   <|user|>\n Another question<|assistant|>",
-            /* .expected_output_jinja= */ "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n    I am an assistant   <|user|>\n Another question<|assistant|>",
-        },
-        {
-            /* .name= */ "ChatGLM4",
-            /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
-            /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>\n",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "GLMEdge",
-            /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
-            /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
-            /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
-            /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
-            /* .expected_output= */ U8C("You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>"),
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "DeepSeek-V2",
-            /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
-            /* .expected_output= */ U8C("You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<｜end▁of▁sentence｜>User: Who are you\n\nAssistant:    I am an assistant   <｜end▁of▁sentence｜>User: Another question\n\nAssistant:"),
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "<｜end▁of▁sentence｜>",
-        },
-        {
-            /* .name= */ "ibm-granite/granite-3.0-8b-instruct",
-            /* .template_str= */ "{%- if tools %}\n    {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n    {%- for tool in tools %}\n    {{- tool | tojson(indent=4) }}\n    {%- if not loop.last %}\n        {{- '\n\n' }}\n    {%- endif %}\n    {%- endfor %}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n    {{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'user' %}\n    {{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'assistant' %}\n    {{- '<|start_of_role|>assistant<|end_of_role|>'  + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'assistant_tool_call' %}\n    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'tool_response' %}\n    {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- endif %}\n    {%- if loop.last and add_generation_prompt %}\n    {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n    {%- endif %}\n{%- endfor %}",
-            /* .expected_output= */       "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>   I am an assistant   <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>",
-            /* .expected_output_jinja= */ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>   I am an assistant   <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>",
-        },
-        {
-            /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (mistralai 'v1' template with a system prompt)",
-            /* .template_str= */ "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
-            /* .expected_output= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there</s> [INST] Who are you [/INST]    I am an assistant   </s> [INST] Another question [/INST]",
-            /* .expected_output_jinja= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there</s> [INST] Who are you [/INST]    I am an assistant   </s> [INST] Another question [/INST]",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "Mistral-Large-Instruct-2407 (mistralai 'v3' template; modified to have system prompt at start)",
-            /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS] [\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"[TOOL_CALLS] [\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- \" \" + message[\"content\"]|trim + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n",
-            /* .expected_output= */       "[INST] You are a helpful assistant\n\nHello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant</s>[INST] Another question[/INST]",
-            /* .expected_output_jinja= */ "[INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant</s>[INST] You are a helpful assistant\n\nAnother question[/INST]",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "Mistral-Nemo-Instruct-2407 (mistralai 'v3-tekken' template; modified to have system prompt at start)",
-            /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS][\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST]\" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST]\" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n        {{- \"[TOOL_CALLS][\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- message[\"content\"] + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS]{\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n",
-            /* .expected_output= */       "[INST]You are a helpful assistant\n\nHello[/INST]Hi there</s>[INST]Who are you[/INST]   I am an assistant   </s>[INST]Another question[/INST]",
-            /* .expected_output_jinja= */ "[INST]Hello[/INST]Hi there</s>[INST]Who are you[/INST]   I am an assistant   </s>[INST]You are a helpful assistant\n\nAnother question[/INST]",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "mistralai/Mistral-Large-Instruct-2411 (mistralai 'v7' template)",
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}",
-            /* .expected_output= */ "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT][INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST]    I am an assistant   </s>[INST] Another question[/INST]",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "ai-sage/GigaChat-20B-A3B-instruct",
-            /* .template_str= */ "{% if messages[0]['role'] == 'system' -%}\n    {%- set loop_messages = messages[1:] -%}\n    {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n    {%- set loop_messages = messages -%}\n    {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n    {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n    {% endif %}\n    \n    {%- if loop.index0 == 0 -%}\n        {{ system_message -}}\n    {%- endif -%}\n    {%- if message['role'] == 'user' -%}\n        {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n        {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3]  + additional_special_tokens[1] -}}\n    {%- endif -%}\n    {%- if message['role'] == 'assistant' -%}\n        {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n    {%- endif -%}\n    {%- if loop.last and add_generation_prompt -%}\n        {{ 'assistant' + additional_special_tokens[0] -}}\n    {%- endif -%}\n{%- endfor %}",
-            /* .expected_output= */ "<s>You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>   I am an assistant   <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-            /* .supported_with_jinja= */ false, // Requires additional_special_tokens as extra context
-        },
-        {
-            /* .name= */ "Infinigence/Megrez-3B-Instruct",
-            /* .template_str= */ U8C("{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct，将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"),
-            /* .expected_output= */ "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|>   I am an assistant   <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "phi-4",
-            /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|><|im_start|>assistant<|im_sep|>'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}",
-            /* .expected_output= */ "<|im_start|>system<|im_sep|>You are a helpful assistant<|im_end|><|im_start|>user<|im_sep|>Hello<|im_end|><|im_start|>assistant<|im_sep|>Hi there<|im_end|><|im_start|>user<|im_sep|>Who are you<|im_end|><|im_start|>assistant<|im_sep|>   I am an assistant   <|im_end|><|im_start|>user<|im_sep|>Another question<|im_end|><|im_start|>assistant<|im_sep|>",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct",
-            /* .template_str= */ "<s>{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n    {%- set name = tool.function.name %}\n    {%- set description = tool.function.description|default('') %}\n    {%- set parameters = tool.function.parameters|tojson %}\n    {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n    {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n    {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n    {{- tools_prefix }}\n    {%- for tool in tools %}\n        {{- __render_tool(tool) }}\n    {%- endfor %}\n    {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n    {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n    {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n    {{- names.assistant }}\n    {%- set call = message['function_call'] %}\n    {%- if call %}\n        {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n    {%- else %}\n        {{- ' ' + message.content + '\\n\\n' }}\n    {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'user' %}\n        {{- __render_user_message(message) }}\n    {%- endif %}\n    {%- if message.role == 'assistant' and not loop.last %}\n        {{- __render_assistant_message(message) }}\n    {%- endif %}\n    {%- if message.role == 'tool' %}\n        {{- __render_tool_message(message) }}\n    {%- endif %}\n    {%- if loop.last %}\n        {{- ' Ассистент:[SEP]' }}\n    {%- endif %}\n{%- endfor %}\n",
-            /* .expected_output= */ " Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент:    I am an assistant   \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
-            /* .expected_output_jinja= */ "<s> Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент:    I am an assistant   \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
-            /* .bos_token= */ "<s>",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "inclusionAI/Ling-lite",
-            /* .template_str */ "{% for message in messages %}{% set role = message['role'] | lower %}{% if role == 'user' %}{% set role = 'HUMAN' %}{% endif %}{% set role = role | upper %}{{ '<role>' + role + '</role>' + message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ '<role>ASSISTANT</role>' }}{% endif %}",
-            /* .expected_output= */ "<role>SYSTEM</role>You are a helpful assistant<role>HUMAN</role>Hello<role>ASSISTANT</role>Hi there<role>HUMAN</role>Who are you<role>ASSISTANT</role>   I am an assistant   <role>HUMAN</role>Another question<role>ASSISTANT</role>",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "ByteDance-Seed/Seed-OSS-36B-Instruct",
-            /* .template_str */ "{# <seed:bos> #}{%- for message in messages %}{%- if message.role in [\"user\", \"system\"] %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- elif message.role == \"assistant\" %}{{ bos_token + message.role }}{%- if message.content is defined and message.content is string and message.content|trim|length > 0 %}{{ \"\\n\" + message.content|trim + eos_token }}{%- endif %}{%- else %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{ bos_token + \"assistant\\n\" }}{%- endif %}",
-            /* .expected_output= */ "<seed:bos>system\nYou are a helpful assistant<seed:eos><seed:bos>user\nHello<seed:eos><seed:bos>assistant\nHi there<seed:eos><seed:bos>user\nWho are you<seed:eos><seed:bos>assistant\nI am an assistant<seed:eos><seed:bos>user\nAnother question<seed:eos><seed:bos>assistant\n",
-            /* .expected_output_jinja= */ "<seed:bos>system\nYou are a helpful assistant<seed:eos><seed:bos>user\nHello<seed:eos><seed:bos>assistant\nHi there<seed:eos><seed:bos>user\nWho are you<seed:eos><seed:bos>assistant\nI am an assistant<seed:eos><seed:bos>user\nAnother question<seed:eos><seed:bos>assistant\n",
-            /* .bos_token= */ "<seed:bos>",
-            /* .eos_token= */ "<seed:eos>",
-        }
-    };
-    std::vector<char> formatted_chat(1024);
-    int32_t res;
-
-    // list all supported templates
-    std::vector<const char *> supported_tmpl;
-    res = llama_chat_builtin_templates(nullptr, 0);
-    assert(res > 0);
-    supported_tmpl.resize(res);
-    res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
-    std::cout << "Built-in chat templates:\n";
-    for (auto tmpl : supported_tmpl) {
-        std::cout << "  " << tmpl << "\n";
-    }
-
-    // test invalid chat template
-    res = llama_chat_apply_template("INVALID TEMPLATE", conversation.data(), conversation.size(), true, formatted_chat.data(), formatted_chat.size());
-    assert(res < 0);
-    const auto add_generation_prompt = true;
-
-    for (const auto & test_case : test_cases) {
-        std::cout << "\n\n=== " << test_case.name << " ===\n\n";
-        formatted_chat.resize(1024);
-        res = llama_chat_apply_template(
-            test_case.template_str.c_str(),
-            conversation.data(),
-            conversation.size(),
-            add_generation_prompt,
-            formatted_chat.data(),
-            formatted_chat.size()
-        );
-        formatted_chat.resize(res);
-        std::string output(formatted_chat.data(), formatted_chat.size());
-        if (output != test_case.expected_output) {
-            std::cout << "Expected:\n" << test_case.expected_output << "\n";
-            std::cout << "-------------------------\n";
-            std::cout << "Actual:\n" << output << "\n";
-            std::cout.flush();
-            assert(output == test_case.expected_output);
-        }
-    }
-
-    std::vector<common_chat_msg> messages;
-    for (const auto & msg : conversation) {
-        messages.push_back(simple_msg(msg.role, msg.content));
-    }
-    for (const auto & test_case : test_cases) {
-        if (!test_case.supported_with_jinja) {
-            continue;
-        }
-        std::cout << "\n\n=== " << test_case.name << " (jinja) ===\n\n";
-        try {
-            auto output = format_using_common(
-                                test_case.template_str,
-                                test_case.bos_token,
-                                test_case.eos_token,
-                                messages);
-            auto expected_output = normalize_newlines(test_case.expected_output_jinja.empty() ? test_case.expected_output : test_case.expected_output_jinja);
-            if (output != expected_output) {
-                std::cout << "Template:```\n" << test_case.template_str << "\n```";
-                std::cout << "-------------------------\n";
-                std::cout << "Expected:```\n" << expected_output << "\n```";
-                std::cout << "-------------------------\n";
-                std::cout << "Actual:```\n" << output << "\n```";
-                std::cout.flush();
-                assert(output == expected_output);
-            }
-        } catch (const std::exception & e) {
-            std::cerr << "ERROR: " << e.what() << "\n";
-            assert(false);
-        }
-    }
-
-    // TODO: llama_chat_format_single will be deprecated, remove these tests later
-
-    // test llama_chat_format_single for system message
-    std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n";
-    std::vector<common_chat_msg> chat2;
-    auto sys_msg = simple_msg("system", "You are a helpful assistant");
-
-    auto fmt_sys = [&](std::string tmpl_str) {
-        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
-        auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
-        std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n";
-        std::cout << "-------------------------\n";
-        return output;
-    };
-    assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n");
-    assert(fmt_sys("mistral-v1") == " [INST] You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v3") == "[INST] You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v3-tekken") == "[INST]You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v7") == "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT]");
-    assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n");
-    assert(fmt_sys("llama2-sys") == "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\n");
-    assert(fmt_sys("mistral") == "[INST] You are a helpful assistant\n"); // for old pre-v1 templates
-    assert(fmt_sys("gemma")  == ""); // for gemma, system message is merged with user message
-    assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>");
-    assert(fmt_sys("gigachat") == "<s>You are a helpful assistant<|message_sep|>");
-
-
-    // test llama_chat_format_single for user message
-    std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n";
-    chat2.push_back(simple_msg("system", "You are a helpful assistant"));
-    chat2.push_back(simple_msg("user", "Hello"));
-    chat2.push_back(simple_msg("assistant", "I am assistant"));
-    auto new_msg = simple_msg("user", "How are you");
-
-    auto fmt_single = [&](const std::string & tmpl_str) {
-        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str());
-        auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
-        std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n";
-        std::cout << "-------------------------\n";
-        return output;
-    };
-    assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n");
-    assert(fmt_single("mistral-v1") == " [INST] How are you [/INST]");
-    assert(fmt_single("mistral-v3") == "[INST] How are you[/INST]");
-    assert(fmt_single("mistral-v3-tekken") == "[INST]How are you[/INST]");
-    assert(fmt_single("mistral-v7") == "[INST] How are you[/INST]");
-    assert(fmt_single("llama2") == "[INST] How are you [/INST]");
-    assert(fmt_single("mistral") == "[INST] How are you [/INST]"); // for old pre-v1 templates
-    assert(fmt_single("gemma")  == "\n<start_of_turn>user\nHow are you<end_of_turn>\n<start_of_turn>model\n");
-    assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n");
-    // assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>");
-
-    std::cout << "\nOK: All tests passed successfully.\n";
-
-    return 0;
-}
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 4378a8db71..ad2953f6da 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -5,18 +5,20 @@
 //
 //    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
 //
+#include "../src/llama-grammar.h"
+#include "../src/unicode.h"
+#include "chat-auto-parser.h"
 #include "chat.h"
-
+#include "common.h"
+#include "ggml.h"
 #include "log.h"
 
-#include "../src/unicode.h"
-#include "../src/llama-grammar.h"
-
-#include <nlohmann/json.hpp>
-
+#include <algorithm>
 #include <fstream>
-#include <iostream>
 #include <functional>
+#include <iostream>
+#include <nlohmann/json.hpp>
+#include <stdexcept>
 #include <string>
 
 using json = nlohmann::ordered_json;
@@ -33,6 +35,7 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg_diff &
     os << "}";
     return os;
 }
+
 // operator<< for vector<common_chat_msg_diff>:
 static std::ostream & operator<<(std::ostream & os, const std::vector<common_chat_msg_diff> & diffs) {
     os << "[\n";
@@ -42,6 +45,7 @@ static std::ostream & operator<<(std::ostream & os, const std::vector<common_cha
     os << "]";
     return os;
 }
+
 static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg) {
     os << "{ role: " << msg.role << "; ";
     os << "content: " << msg.content << "; ";
@@ -53,7 +57,8 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg)
     os << "reasoning_content: " << msg.reasoning_content << "; ";
     os << "tool_calls: [\n";
     for (const auto & tool_call : msg.tool_calls) {
-        os << "  { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id << " },\n";
+        os << "  { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id
+           << " },\n";
     }
     os << "]";
     os << "}";
@@ -70,29 +75,29 @@ static common_chat_msg normalize(const common_chat_msg & msg) {
         try {
             tool_call.arguments = json::parse(tool_call.arguments).dump();
         } catch (const std::exception &) {
-            // Do nothing
         }
     }
     return normalized;
 }
 
-
-template <>
-bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
+template <> bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
     return normalize(expected) == normalize(actual);
 }
 
 template <class T> static void assert_equals(const T & expected, const T & actual) {
     if (!equals(expected, actual)) {
-        std::cerr << "Expected:```\n" << expected << "\n```" << std::endl;
-        std::cerr << "Actual:```\n" << actual << "\n```" << std::endl;
-        std::cerr << std::flush;
+        std::ostringstream oss_expected;
+        oss_expected << expected;
+        std::ostringstream oss_actual;
+        oss_actual << actual;
+        LOG_ERR("Expected: %s\n", oss_expected.str().c_str());
+        LOG_ERR("Actual: %s\n", oss_actual.str().c_str());
+        common_log_flush(common_log_main());
         throw std::runtime_error("Test failed");
     }
 }
 
 static std::string read_file(const std::string & path) {
-    std::cerr << "# Reading: " << path << '\n' << std::flush;
     std::ifstream fs(path, std::ios_base::binary);
     if (!fs.is_open()) {
         fs = std::ifstream("../" + path, std::ios_base::binary);
@@ -146,11 +151,13 @@ static std::string renormalize_json(const std::string & json_str) {
         auto json_obj = json::parse(json_str);
         return json_obj.dump();
     } catch (const std::exception & e) {
-        std::cerr << "Failed to parse JSON: " << e.what() << '\n';
-        return json_str;
+        return "";  // ignore parial JSON contents for comparison purposes
     }
 }
-static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) {
+
+static void assert_msg_equals(const common_chat_msg & expected,
+                              const common_chat_msg & actual,
+                              bool                    ignore_whitespace_differences = false) {
     assert_equals(expected.role, actual.role);
     if (ignore_whitespace_differences) {
         assert_equals(string_strip(expected.content), string_strip(actual.content));
@@ -183,7 +190,7 @@ static void assert_msg_equals(const common_chat_msg & expected, const common_cha
     }
 }
 
-common_chat_tool special_function_tool {
+static common_chat_tool special_function_tool{
     /* .name = */ "special_function",
     /* .description = */ "I'm special",
     /* .parameters = */ R"({
@@ -197,7 +204,7 @@ common_chat_tool special_function_tool {
         "required": ["arg1"]
     })",
 };
-common_chat_tool special_function_tool_with_optional_param {
+static common_chat_tool special_function_tool_with_optional_param{
     /* .name = */ "special_function_with_opt",
     /* .description = */ "I'm special but have optional stuff",
     /* .parameters = */ R"({
@@ -215,7 +222,7 @@ common_chat_tool special_function_tool_with_optional_param {
         "required": ["arg1"]
     })",
 };
-common_chat_tool python_tool {
+static common_chat_tool python_tool{
     /* .name = */ "python",
     /* .description = */ "an ipython interpreter",
     /* .parameters = */ R"({
@@ -229,44 +236,229 @@ common_chat_tool python_tool {
         "required": ["code"]
     })",
 };
-common_chat_tool code_interpreter_tool {
-    /* .name = */ "code_interpreter",
-    /* .description = */ "an ipython interpreter",
+
+static common_chat_tool html_tool{
+    /* .name = */ "html",
+    /* .description = */ "an html validator",
     /* .parameters = */ R"({
         "type": "object",
         "properties": {
-            "code": {
+            "markup": {
                 "type": "string",
-                "description": "Python code to execute."
+                "description": "HTML markup to validate."
             }
         },
-        "required": ["code"]
+        "required": ["markup"]
     })",
 };
-std::vector<common_chat_tool> tools           { special_function_tool, special_function_tool_with_optional_param, python_tool };
-std::vector<common_chat_tool> llama_3_1_tools { special_function_tool, code_interpreter_tool };
+
+static common_chat_tool get_time_tool{
+    /* .name = */ "get_time",
+    /* .description = */ "Get the current time in a city",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "city": {
+                "type": "string",
+                "description": "City name"
+            }
+        },
+        "required": ["city"]
+    })",
+};
+
+static common_chat_tool get_weather_tool{
+    /* .name = */ "get_weather",
+    /* .description = */ "Get the current weather in a city",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "city": {
+                "type": "string",
+                "description": "City name"
+            }
+        },
+        "required": ["city"]
+    })",
+};
+
+static common_chat_tool todo_list{
+    /* .name = */ "todo_list",
+    /* .description = */ "Create or update the todo list",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "todos": {
+                "type": "array",
+                "description": "List of TODO list items"
+            }
+        },
+        "required": ["todos"]
+    })",
+};
+
+static common_chat_tool edit_tool{
+    /* .name = */ "edit",
+    /* .description = */ "Edit file",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "filename": {
+                "type": "string",
+                "description": "Path of file to edit"
+            },
+            "oldString": {
+                "type": "string",
+                "description": "String to replace"
+            },
+            "newString": {
+                "type": "string",
+                "description": "New (replacement) value"
+            }
+        },
+        "required": ["filename", "oldString", "newString"]
+    })",
+};
+
+static std::vector<common_chat_tool> tools{ special_function_tool, special_function_tool_with_optional_param,
+                                            python_tool, html_tool, todo_list };
+
+const common_chat_msg message_user{
+    "user",
+    "Hey there!",
+    /* .content_parts = */ {},
+    /* .tool_calls = */ {},
+    /* .reasoning_content = */ "",
+    /* .tool_name = */ "",
+    /* .tool_call_id = */ "",
+};
+
+const common_chat_msg message_user_parts{
+    "user",
+    /* .content = */ "",
+    /* .content_parts = */
+    {
+     { "text", "Hey" },
+     { "text", "there" },
+     },
+    /* .tool_calls = */
+    {                 },
+    /* .reasoning_content = */
+    "",
+    /* .tool_name = */ "",
+    /* .tool_call_id = */ "",
+};
+
+static common_chat_msg simple_assist_msg(const std::string & content,
+                                         const std::string & reasoning_content = "",
+                                         const std::string & tool_name         = "",
+                                         const std::string & arguments         = "",
+                                         const std::string & id                = "") {
+    common_chat_msg msg;
+    msg.role              = "assistant";
+    msg.content           = content;
+    msg.reasoning_content = reasoning_content;
+    if (!tool_name.empty() || !id.empty()) {
+        msg.tool_calls.push_back({ tool_name, arguments, id });
+    }
+    return msg;
+}
+
+static common_chat_msg message_with_tool_calls(const std::string & tool_name, const std::string & arguments) {
+    return simple_assist_msg("", "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_tool_calls_and_reasoning(const std::string & tool_name,
+                                                             const std::string & arguments,
+                                                             const std::string & reasoning) {
+    return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_content_and_multiple_tool_calls(
+    const std::string &                                      reasoning,
+    const std::string &                                      content,
+    const std::vector<std::pair<std::string, std::string>> & tool_calls) {
+    common_chat_msg msg;
+    msg.role              = "assistant";
+    msg.content           = content;
+    msg.reasoning_content = reasoning;
+    for (const auto & [name, args] : tool_calls) {
+        msg.tool_calls.push_back({ name, args, "" });
+    }
+    return msg;
+}
+
+static common_chat_msg message_with_content_and_tool_call(const std::string & content,
+                                                          const std::string & tool_name,
+                                                          const std::string & arguments) {
+    return simple_assist_msg(content, "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_and_tool_call(const std::string & reasoning,
+                                                            const std::string & tool_name,
+                                                            const std::string & arguments) {
+    return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+const common_chat_msg message_assist       = simple_assist_msg("Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_empty = simple_assist_msg("");
+const common_chat_msg message_assist_thoughts_unparsed_deepseek =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_md =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
+const common_chat_msg message_assist_thoughts_unparsed_md_partial =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
+
+const common_chat_msg message_assist_thoughts_unparsed_r7b =
+    simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_magistral =
+    simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
+const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
+const common_chat_msg message_assist_thoughts_unopened_unparsed =
+    simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking");
+const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_noopt =
+    simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_withopt =
+    simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
+const common_chat_msg message_assist_call_content =
+    simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_empty_args  = simple_assist_msg("", "", "special_function");
+const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg");
+const common_chat_msg message_assist_call_thoughts =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_thoughts_unparsed =
+    simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_thoughts_content =
+    simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_id =
+    simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
+const common_chat_msg message_assist_call_idx =
+    simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
+const common_chat_msg message_assist_thoughts_call_idx =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
+const common_chat_msg message_assist_thoughts_partial_call =
+    simple_assist_msg("", "I'm\nthinking", "", "", /* id = */ "0");
+const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
+const common_chat_msg message_assist_call_python_lines =
+    simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
+const common_chat_msg message_assist_call_python_lines_unclosed =
+    simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
+const common_chat_msg message_assist_json_content =
+    simple_assist_msg("{\n  \"response\": \"Hello, world!\\nWhat's up?\"\n}");
 
 struct delta_data {
     std::string        delta;
     common_chat_params params;
 };
 
-static common_chat_msg simple_assist_msg(const std::string & content, const std::string & reasoning_content = "", const std::string & tool_name = "", const std::string & arguments = "", const std::string & id = "") {
-    common_chat_msg msg;
-    msg.role = "assistant";
-    msg.content = content;
-    msg.reasoning_content = reasoning_content;
-    if (!tool_name.empty()) {
-        msg.tool_calls.push_back({ tool_name, arguments, id });
-    }
-    return msg;
-}
-
-static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
-                             const common_chat_msg & user_message,
-                             const common_chat_msg & delta_message,
+static delta_data init_delta(const struct common_chat_templates *  tmpls,
+                             const std::vector<std::string> &      end_tokens,
+                             const common_chat_msg &               user_message,
+                             const common_chat_msg &               delta_message,
                              const std::vector<common_chat_tool> & tools,
-                             const common_chat_tool_choice & tool_choice) {
+                             const common_chat_tool_choice &       tool_choice) {
     common_chat_templates_inputs inputs;
     inputs.parallel_tool_calls = true;
     inputs.messages.push_back(user_message);
@@ -317,20 +509,27 @@ static delta_data init_delta(const struct common_chat_templates * tmpls, const s
   gets the diff, removes any end tokens and parses the result w/ the grammar, checking that
   the parsed message is the same as the test_message
 */
-static void test_templates(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
-                          const common_chat_msg & test_message,
-                          const std::vector<common_chat_tool> & tools = {},
-                          const std::string & expected_delta = "",
-                          bool expect_grammar_triggered = true,
-                          bool test_grammar_if_triggered = true,
-                          common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
-                          bool ignore_whitespace_differences = false
-                        ) {
+static void test_templates(const struct common_chat_templates *  tmpls,
+                           const std::vector<std::string> &      end_tokens,
+                           const common_chat_msg &               test_message,
+                           const std::vector<common_chat_tool> & tools                     = {},
+                           const std::string &                   expected_delta            = "",
+                           bool                                  expect_grammar_triggered  = true,
+                           bool                                  test_grammar_if_triggered = true,
+                           common_reasoning_format               reasoning_format = COMMON_REASONING_FORMAT_NONE,
+                           bool                                  ignore_whitespace_differences = false) {
     common_chat_msg user_message;
-    user_message.role = "user";
+    user_message.role    = "user";
     user_message.content = "Hello, world!";
 
-    for (const auto & tool_choice : std::vector<common_chat_tool_choice> {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) {
+    common_chat_templates_inputs inputs_tools;
+    inputs_tools.messages = { message_user };
+    inputs_tools.tools    = { special_function_tool };
+
+    common_chat_params params = common_chat_templates_apply(tmpls, inputs_tools);
+
+    for (const auto & tool_choice :
+         std::vector<common_chat_tool_choice>{ COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED }) {
         auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice);
         if (!expected_delta.empty()) {
             if (ignore_whitespace_differences) {
@@ -343,8 +542,12 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
         if (expect_grammar_triggered) {
             // TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time
             common_chat_parser_params params;
-            params.format = data.params.format;
+            params.format           = data.params.format;
             params.reasoning_format = reasoning_format;
+            if (!params.parser.empty()) {
+                syntax.parser = common_peg_arena();
+                syntax.parser.load(params.parser);
+            }
             const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params);
             assert_msg_equals(test_message, msg, ignore_whitespace_differences);
         }
@@ -358,43 +561,43 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
                 throw std::runtime_error("Failed to build grammar");
             }
             auto earliest_trigger_pos = std::string::npos;
-            auto constrained = data.delta;
+            auto constrained          = data.delta;
             for (const auto & trigger : data.params.grammar_triggers) {
-                size_t pos = std::string::npos;
+                size_t      pos = std::string::npos;
                 std::smatch match;
                 switch (trigger.type) {
                     case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
-                    {
-                        const auto & word = trigger.value;
-                        pos = constrained.find(word);
-                        break;
-                    }
+                        {
+                            const auto & word = trigger.value;
+                            pos               = constrained.find(word);
+                            break;
+                        }
                     case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
-                    {
-                        const auto & pattern = trigger.value;
-                        if (std::regex_search(constrained, match, std::regex(pattern))) {
-                            pos = match.position(1);
+                        {
+                            const auto & pattern = trigger.value;
+                            if (std::regex_search(constrained, match, std::regex(pattern))) {
+                                pos = match.position(1);
+                            }
+                            break;
                         }
-                        break;
-                    }
                     case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
-                    {
-                        const auto & pattern = trigger.value;
-                        if (std::regex_match(constrained, match, std::regex(pattern))) {
-                            auto mpos = std::string::npos;
-                            for (size_t i = 1; i < match.size(); ++i) {
-                                if (match[i].length() > 0) {
-                                    mpos = match.position(i);
-                                    break;
+                        {
+                            const auto & pattern = trigger.value;
+                            if (std::regex_match(constrained, match, std::regex(pattern))) {
+                                auto mpos = std::string::npos;
+                                for (size_t i = 1; i < match.size(); ++i) {
+                                    if (match[i].length() > 0) {
+                                        mpos = match.position(i);
+                                        break;
+                                    }
                                 }
+                                if (mpos == std::string::npos) {
+                                    mpos = match.position(0);
+                                }
+                                pos = mpos;
                             }
-                            if (mpos == std::string::npos) {
-                                mpos = match.position(0);
-                            }
-                            pos = mpos;
+                            break;
                         }
-                        break;
-                    }
                     default:
                         throw std::runtime_error("Unknown trigger type");
                 }
@@ -407,7 +610,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
             }
             auto grammar_triggered = false;
             if (earliest_trigger_pos != std::string::npos) {
-                constrained = constrained.substr(earliest_trigger_pos);
+                constrained       = constrained.substr(earliest_trigger_pos);
                 grammar_triggered = true;
             }
             if (data.params.grammar_lazy) {
@@ -416,8 +619,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
 
             if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
                 throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
-                    "\n\nConstrained: " + constrained +
-                    "\n\nGrammar: " + data.params.grammar);
+                                         "\n\nConstrained: " + constrained + "\n\nGrammar: " + data.params.grammar);
             }
         }
     }
@@ -431,24 +633,31 @@ template <typename T>
 static void test_parser_with_streaming(const common_chat_msg & expected, const std::string & raw_message, T parse_msg) {
     constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
         auto len = s.size();
-        if (len == 0) return 0;
+        if (len == 0) {
+            return 0;
+        }
         auto i = len;
         for (size_t back = 0; back < 4 && i > 0; ++back) {
             --i;
             unsigned char c = s[i];
             if ((c & 0x80) == 0) {
                 return len;
-            } else if ((c & 0xC0) == 0xC0) {
+            }
+            if ((c & 0xC0) == 0xC0) {
                 size_t expected_len = 0;
-                if ((c & 0xE0) == 0xC0) expected_len = 2;
-                else if ((c & 0xF0) == 0xE0) expected_len = 3;
-                else if ((c & 0xF8) == 0xF0) expected_len = 4;
-                else return i;
-                if (len - i >= expected_len) {
-                    return len;
+                if ((c & 0xE0) == 0xC0) {
+                    expected_len = 2;
+                } else if ((c & 0xF0) == 0xE0) {
+                    expected_len = 3;
+                } else if ((c & 0xF8) == 0xF0) {
+                    expected_len = 4;
                 } else {
                     return i;
                 }
+                if (len - i >= expected_len) {
+                    return len;
+                }
+                return i;
             }
         }
         return len - std::min(len, size_t(3));
@@ -457,14 +666,14 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
         return s.substr(0, utf8_truncate_safe_len(s));
     };
 
-    auto merged = simple_assist_msg("");
+    auto merged   = simple_assist_msg("");
     auto last_msg = parse_msg("");
     for (size_t i = 1; i <= raw_message.size(); ++i) {
         auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i))));
         if (curr_msg == simple_assist_msg("")) continue;
-        LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str());
+        LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat<json>({curr_msg}).dump().c_str());
         for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
-            LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str());
+            LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat<json>(diff).dump().c_str());
             if (!diff.reasoning_content_delta.empty()) {
                 merged.reasoning_content += diff.reasoning_content_delta;
             }
@@ -473,14 +682,14 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
             }
             if (diff.tool_call_index != std::string::npos) {
                 if (!diff.tool_call_delta.name.empty()) {
-                    merged.tool_calls.push_back({diff.tool_call_delta.name, "", ""});
+                    merged.tool_calls.push_back({ diff.tool_call_delta.name, "", "" });
                 }
                 if (!diff.tool_call_delta.arguments.empty()) {
                     GGML_ASSERT(!merged.tool_calls.empty());
                     merged.tool_calls.back().arguments += diff.tool_call_delta.arguments;
                 }
             }
-            LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str());
+            LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat<json>({merged}).dump().c_str());
         }
         assert_msg_equals(curr_msg, merged, true);
         last_msg = curr_msg;
@@ -489,99 +698,90 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
     assert_msg_equals(expected, merged, true);
 }
 
-const common_chat_msg message_user {
-    "user",
-    "Hey there!",
-    /* .content_parts = */ {},
-    /* .tool_calls = */ {},
-    /* .reasoning_content = */ "",
-    /* .tool_name = */ "",
-    /* .tool_call_id = */ "",
-};
-
-const common_chat_msg message_user_parts {
-    "user",
-    /* .content = */ "",
-    /* .content_parts = */ {
-        { "text", "Hey" },
-        { "text", "there" },
-    },
-    /* .tool_calls = */ {},
-    /* .reasoning_content = */ "",
-    /* .tool_name = */ "",
-    /* .tool_call_id = */ "",
-};
-
-const common_chat_msg message_assist                              = simple_assist_msg("Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_empty                        = simple_assist_msg("");
-const common_chat_msg message_assist_thoughts_unparsed_deepseek   = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_md         = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
-const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
-
-const common_chat_msg message_assist_thoughts_unparsed_r7b       = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_magistral = simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
-const common_chat_msg message_assist_thoughts                    = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
-const common_chat_msg message_assist_thoughts_unopened_unparsed  = simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_no_content         = simple_assist_msg("", "I'm\nthinking");
-const common_chat_msg message_assist_call                        = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_noopt                  = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_withopt                = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
-const common_chat_msg message_assist_call_content                = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_empty_args             = simple_assist_msg("", "", "special_function");
-const common_chat_msg message_assist_call_cutoff_args            = simple_assist_msg("", "", "special_function", "{\"arg");
-const common_chat_msg message_assist_call_thoughts               = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_thoughts_unparsed      = simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_thoughts_content       = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_id                     = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
-const common_chat_msg message_assist_call_idx                    = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
-const common_chat_msg message_assist_thoughts_call_idx           = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
-const common_chat_msg message_assist_call_python                 = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
-const common_chat_msg message_assist_call_python_lines           = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
-const common_chat_msg message_assist_call_python_lines_unclosed  = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
-const common_chat_msg message_assist_call_code_interpreter       = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}");
-
 // Use for PEG parser implementations
 struct peg_test_case {
     common_chat_templates_inputs params;
-    std::string input;
-    common_chat_msg expect;
+    std::string                  input;
+    common_chat_msg              expect;
+    bool                         is_partial = false;
 };
 
 struct make_peg_parser {
     common_chat_params params_;
-    common_peg_arena arena_;
+    common_peg_arena   arena_;
+    bool               detailed_debug_;
 
-    make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) {
-        params_ = common_chat_templates_apply(tmpls, inputs);
+    make_peg_parser(common_chat_templates *              tmpls,
+                    const common_chat_templates_inputs & inputs,
+                    bool                                 detailed_debug = false) {
+        detailed_debug_ = detailed_debug;
+        params_         = common_chat_templates_apply(tmpls, inputs);
         arena_.load(params_.parser);
     }
 
     common_chat_msg parse(const std::string & msg, bool is_partial) {
-        common_chat_parser_params parser_params;
+common_chat_parser_params parser_params;
         parser_params.format = params_.format;
         return common_chat_peg_parse(arena_, msg, is_partial, parser_params);
-    }
 };
 
-static void test_peg_parser(common_chat_templates * tmpls, const std::function<void(peg_test_case &)> & init) {
+static void test_peg_parser(common_chat_templates *                      tmpls,
+                            const std::function<void(peg_test_case &)> & init,
+                            bool                                         detailed_debug) {
+    // UTF-8-safe truncation helper (same as in test_parser_with_streaming)
+    constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
+        auto len = s.size();
+        if (len == 0) {
+            return 0;
+        }
+        auto i = len;
+        for (size_t back = 0; back < 4 && i > 0; ++back) {
+            --i;
+            unsigned char c = s[i];
+            if ((c & 0x80) == 0) {
+                return len;
+            }
+            if ((c & 0xC0) == 0xC0) {
+                size_t expected_len = 0;
+                if ((c & 0xE0) == 0xC0) {
+                    expected_len = 2;
+                } else if ((c & 0xF0) == 0xE0) {
+                    expected_len = 3;
+                } else if ((c & 0xF8) == 0xF0) {
+                    expected_len = 4;
+                } else {
+                    return i;
+                }
+                if (len - i >= expected_len) {
+                    return len;
+                }
+                return i;
+            }
+        }
+        return len - std::min(len, size_t(3));
+    };
+
     peg_test_case tc;
     init(tc);
     if (tc.params.messages.empty()) {
-        tc.params.messages = {message_user};
+        tc.params.messages = { message_user };
     }
     if (tc.expect.role.empty()) {
         tc.expect.role = "assistant";
     }
 
-    auto parser = make_peg_parser(tmpls, tc.params);
+    auto parser = make_peg_parser(tmpls, tc.params, detailed_debug);
 
     common_chat_msg msg_accum;
     common_chat_msg msg_prev;
     msg_accum.role = msg_prev.role = "assistant";
 
     for (size_t i = 1; i <= tc.input.size(); ++i) {
-        auto is_partial = i < tc.input.size();
-        common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial);
+        auto            is_partial  = i < tc.input.size() || tc.is_partial;
+        // Use UTF-8 safe truncation to avoid corrupting multi-byte characters
+        size_t          safe_len    = utf8_truncate_safe_len(std::string_view(tc.input).substr(0, i));
+        std::string     prefix      = tc.input.substr(0, safe_len);
+        common_chat_msg msg_current = parser.parse(prefix, is_partial);
 
         for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) {
             if (!diff.reasoning_content_delta.empty()) {
@@ -591,11 +791,21 @@ static void test_peg_parser(common_chat_templates * tmpls, const std::function<v
                 msg_accum.content += diff.content_delta;
             }
             if (diff.tool_call_index != std::string::npos) {
+                // During partial parsing, a new tool call may appear with empty name initially
+                // The name gets filled in as more input is parsed
+                while (msg_accum.tool_calls.size() <= diff.tool_call_index) {
+                    msg_accum.tool_calls.push_back({ "", "", "" });
+                }
+                // Always update name and id from diff (may change during incremental parsing), but only if the delta
+                // actually contains them
                 if (!diff.tool_call_delta.name.empty()) {
-                    msg_accum.tool_calls.push_back({diff.tool_call_delta.name, "", diff.tool_call_delta.id});
+                    msg_accum.tool_calls[diff.tool_call_index].name = diff.tool_call_delta.name;
+                }
+                if (!diff.tool_call_delta.id.empty()) {
+                    msg_accum.tool_calls[diff.tool_call_index].id = diff.tool_call_delta.id;
                 }
                 if (!diff.tool_call_delta.arguments.empty()) {
-                    msg_accum.tool_calls.back().arguments += diff.tool_call_delta.arguments;
+                    msg_accum.tool_calls[diff.tool_call_index].arguments += diff.tool_call_delta.arguments;
                 }
             }
         }
@@ -603,12 +813,121 @@ static void test_peg_parser(common_chat_templates * tmpls, const std::function<v
         msg_prev = msg_current;
     }
 
-    assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+    if (!tc.is_partial) {
+        assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+    }
     assert_msg_equals(tc.expect, msg_accum, true);
 }
 
+// Global template filter for --template flag
+static std::string g_template_filter;
+
+// Fluent builder for PEG parser tests
+class peg_test_builder;
+
+class peg_tester {
+    common_chat_templates_ptr tmpls_;
+    std::string               template_path_;
+    bool                      detailed_debug_;
+    friend class peg_test_builder;
+
+  public:
+    explicit peg_tester(const std::string & template_path, const bool detailed_debug = false) :
+        tmpls_(read_templates(template_path)),
+        template_path_(template_path),
+        detailed_debug_(detailed_debug) {}
+
+    const std::string & template_path() const { return template_path_; }
+
+    peg_test_builder test(const std::string & input);
+};
+
+class peg_test_builder {
+    peg_tester &  tester_;
+    peg_test_case tc_;
+
+  public:
+    peg_test_builder(peg_tester & tester, const std::string & input) : tester_(tester) { tc_.input = input; }
+
+    // Parameter setters
+    peg_test_builder & reasoning_format(common_reasoning_format fmt) {
+        tc_.params.reasoning_format = fmt;
+        return *this;
+    }
+
+    peg_test_builder & tools(std::vector<common_chat_tool> tools) {
+        tc_.params.tools = std::move(tools);
+        return *this;
+    }
+
+    peg_test_builder & enable_thinking(bool val) {
+        tc_.params.enable_thinking = val;
+        return *this;
+    }
+
+    peg_test_builder & parallel_tool_calls(bool val) {
+        tc_.params.parallel_tool_calls = val;
+        return *this;
+    }
+
+    peg_test_builder & json_schema(const std::string & schema) {
+        tc_.params.json_schema = schema;
+        return *this;
+    }
+
+    peg_test_builder & is_partial(bool val) {
+        tc_.is_partial = val;
+        return *this;
+    }
+
+    // Expect setters
+    peg_test_builder & expect(const common_chat_msg & msg) {
+        tc_.expect = msg;
+        return *this;
+    }
+
+    peg_test_builder & expect_content(const std::string & content) {
+        tc_.expect.content = content;
+        return *this;
+    }
+
+    peg_test_builder & expect_reasoning(const std::string & reasoning) {
+        tc_.expect.reasoning_content = reasoning;
+        return *this;
+    }
+
+    peg_test_builder & expect_tool_calls(std::vector<common_chat_tool_call> calls) {
+        tc_.expect.tool_calls = std::move(calls);
+        return *this;
+    }
+
+    // Execute the test
+    void run() {
+        // Check template filter
+        if (!g_template_filter.empty()) {
+            // Case-insensitive substring match
+            std::string template_path_lower = tester_.template_path();
+            std::string filter_lower        = g_template_filter;
+            std::transform(template_path_lower.begin(), template_path_lower.end(), template_path_lower.begin(),
+                           ::tolower);
+            std::transform(filter_lower.begin(), filter_lower.end(), filter_lower.begin(), ::tolower);
+            if (template_path_lower.find(filter_lower) == std::string::npos) {
+                // Skip this test
+                return;
+            }
+        }
+        LOG_DBG("\n================================\nRunning test for template: %s\n================================\n",
+                tester_.template_path().c_str());
+        test_peg_parser(tester_.tmpls_.get(), [this](peg_test_case & t) { t = tc_; }, tester_.detailed_debug_);
+    }
+};
+
+peg_test_builder peg_tester::test(const std::string & input) {
+    return peg_test_builder(*this, input);
+}
+
 static void test_msgs_oaicompat_json_conversion() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     std::vector<common_chat_msg> msgs{
         message_user,
         message_user_parts,
@@ -619,13 +938,12 @@ static void test_msgs_oaicompat_json_conversion() {
         message_assist_call_id,
         message_assist_call_idx,
         message_assist_call_python,
-        message_assist_call_code_interpreter,
     };
     for (const auto & msg : msgs) {
-        auto oai_json = common_chat_msgs_to_json_oaicompat({msg});
+        auto oai_json = common_chat_msgs_to_json_oaicompat<json>({msg});
         auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json);
         assert_equals((size_t) 1, msgs2.size());
-        auto msg2 = msgs2[0];
+        const auto & msg2 = msgs2[0];
         assert_msg_equals(msg, msg2);
     }
     assert_equals(
@@ -646,7 +964,7 @@ static void test_msgs_oaicompat_json_conversion() {
             "  }\n"
             "]"
         ),
-        common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2));
+        common_chat_msgs_to_json_oaicompat<json>({message_user_parts}).dump(2));
 
     assert_equals(
         std::string(
@@ -666,7 +984,7 @@ static void test_msgs_oaicompat_json_conversion() {
             "  }\n"
             "]"
         ),
-        common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2));
+        common_chat_msgs_to_json_oaicompat<json>({message_assist_call_python}).dump(2));
 
     auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]"));
     assert_equals<size_t>(1, res.size());
@@ -685,15 +1003,14 @@ static void test_msgs_oaicompat_json_conversion() {
 }
 
 static void test_tools_oaicompat_json_conversion() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     std::vector<common_chat_tool> tools{
         special_function_tool,
         python_tool,
-        code_interpreter_tool,
     };
 
     for (const auto & tool : tools) {
-        auto oai_json = common_chat_tools_to_json_oaicompat({tool});
+        auto oai_json = common_chat_tools_to_json_oaicompat<json>({tool});
         auto tools2 = common_chat_tools_parse_oaicompat(oai_json);
         assert_equals((size_t) 1, tools2.size());
         auto tool2 = tools2[0];
@@ -726,7 +1043,7 @@ static void test_tools_oaicompat_json_conversion() {
             "  }\n"
             "]"
         ),
-        common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2));
+        common_chat_tools_to_json_oaicompat<json>({special_function_tool}).dump(2));
 
     {
         auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
@@ -1010,14 +1327,14 @@ static void test_template_output_parsers() {
         // Test parsing
         assert_msg_equals(
             simple_assist_msg("", "", "python", ""),
-            test_chat_parse(
+            common_chat_parse(
                 "```json\n"
                 "<function_call> { \"name\" : \"python\"",
                 /* is_partial= */ true,
                 {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
         assert_msg_equals(
             simple_assist_msg("Let's call something\n"),
-            test_chat_parse(
+            common_chat_parse(
                 "Let's call something\n"
                 "<tool_call>{\"name\"",
                 /* is_partial= */ true,
@@ -1027,7 +1344,7 @@ static void test_template_output_parsers() {
                 }));
         assert_msg_equals(
             simple_assist_msg("Let's call something\n"),
-            test_chat_parse(
+            common_chat_parse(
                 "Let's call something\n"
                 "<tool_call>{\"name",
                 /* is_partial= */ true,
@@ -1036,7 +1353,7 @@ static void test_template_output_parsers() {
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
                 }));
         assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
+            common_chat_parse(
                 // QwQ-32B's template adds a trailing <think> if add_generation_prompt
                 "I'm\nthinking</think>\n"
                 "<tool_call>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>",
@@ -1049,580 +1366,39 @@ static void test_template_output_parsers() {
                 }));
         assert_msg_equals(
             message_assist_call,
-            test_chat_parse(
-                "<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function name=\"special_function\">\n"
-                "{\"arg1\": 1}\n"
-                "</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tools>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tools>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<response>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</response>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```xml\n"
-                "<response>\n"
-                "    {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</response>\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```xml\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```json\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```json\n"
-                "\n"
-                "                    <function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n"
-                "                    </function_call> \n"
-                "``` ",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<json>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</json>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<xml>\n"
-                "  {\n"
-                "    \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n"
-                "  }\n"
-                "</xml>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<JSON>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</JSON>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\n  \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        // Test multiple tool calls
-        common_chat_msg message_assist_multiple_calls;
-        message_assist_multiple_calls.role = "assistant";
-        message_assist_multiple_calls.content = "";
-        message_assist_multiple_calls.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"python", "{\"code\":\"print('hello')\"}", ""});
-
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "{\"name\": \"python\", \"arguments\": {\"code\":\"print('hello')\"}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}</function>\n"
-                "<function=python>{\"code\":\"print('hello')\"}</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "This is not a tool call:",
-                "",
-                "special_function",
-                "{\"arg1\": 1}"),
-            test_chat_parse(
-                "This is not a tool call:\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        // assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-        //     test_chat_parse(
-        //         "I'm\nthinking</think>Hello, world!\nWhat's up?",
-        //         COMMON_CHAT_FORMAT_HERMES_2_PRO));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_md,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_md_partial,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unopened_unparsed,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                      "</tool_call>");
-
-        // Test multiple tool calls with template
-        common_chat_msg message_assist_multiple_calls_template;
-        message_assist_multiple_calls_template.role = "assistant";
-        message_assist_multiple_calls_template.content = "";
-        message_assist_multiple_calls_template.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
-        message_assist_multiple_calls_template.tool_calls.push_back({"python", "{\"code\":\"print('test')\"}", ""});
-
-        test_templates(tmpls.get(), end_tokens, message_assist_multiple_calls_template, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                      "</tool_call>\n"
-                      "<tool_call>\n"
-                      "{\"name\": \"python\", \"arguments\": {\"code\":\"print('test')\"}}\n"
-                      "</tool_call>");
-
-        test_templates(tmpls.get(), end_tokens, message_assist_call_python_lines, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"python\", \"arguments\": {\"code\":\"# This is a program:\\nprint('hey')\"}}\n"
-                      "</tool_call>");
-        assert_msg_equals(
-            simple_assist_msg("", /* reasoning_content= */ "<tool_call>nah uhg</tool_call>"),
-            test_chat_parse(
-                "<think><tool_call>nah uhg</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+            common_chat_parse(
+               "<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("", "I should use a tool", "special_function", R"({"arg1": 1})"))
+            .run();
     }
+
     {
-        auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
+        // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models)
+        auto tst = peg_tester("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", detailed_debug);
 
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-                      common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-                      common_chat_templates_apply(
-                          read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(),
-                          inputs_tools_builtin)
-                          .format);
+        tst.test(
+               "<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
 
-        assert_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LLAMA_3_X}));
+        tst.test(
+               "Hello, world!\nWhat's up?<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .run();
 
-        // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools,
-                      "<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
-        test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools,
-                      "<|python_tag|>python.call(code=\"print('hey')\")");
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
-    }
-    {
-        auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
-    }
-    {
-        auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
-                      common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-            common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
-                        common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        for (auto is_partial : { false, true }) {
-            assert_equals(
-                message_assist_call,
-                test_chat_parse(
-                    "<function=special_function>{\"arg1\": 1}</function>",
-                    is_partial,
-                    {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
-        }
-
-        assert_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}<",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<function=special_function>{\"arg1\": 1}</function>");
-    }
-    {
-        auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "Hello, world!\nnono\nWhat's up?",
-                "",
-                "special_function",
-                "{\"arg1\": 1}"),
-            test_chat_parse(
-                "all\n"
-                "Hello, world!\n"
-                "nono\n"
-                "What's up?>>>special_function\n"
-                "{\"arg1\": 1}\n",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call_python_lines,
-            test_chat_parse(
-                "python\n"
-                "# This is a program:\n"
-                "print('hey')",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call_python_lines_unclosed,
-            test_chat_parse(
-                "python\n"
-                "# This is a program:\n"
-                "print('hey')",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "special_function\n"
-                "{\"arg1\": 1} \n                    ",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "all\n"
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, {},
-                      "all\n"
-                      "Hello, world!\n"
-                      "What's up?",
-                      /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "special_function\n"
-                      "{\"arg1\": 1}");
-    }
-    {
-        auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
-        std::vector<std::string>   end_tokens{ "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
-    }
-    {
-        // Original DeepSeek R1 template. Leaves <｜tool▁calls▁begin｜> and others unclosed. Our logic fixes the prompt.
-        auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, params.format);
-            assert_equals(true, params.thinking_forced_open);
-        }
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "<think>I'm\nthinking"),
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I need to remember the correct syntax. It starts with <｜tool▁calls▁begin｜> and ends with"),
-            test_chat_parse(
-                "I need to remember the correct syntax. It starts with <｜tool▁calls▁begin｜> and ends with",
-                /* is_partial= */ true,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unopened_unparsed,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            // Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        // test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-        //               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-        //               "```json\n"
-        //               "{\"arg1\": 1}\n"
-        //               // Look what's not here: <｜tool▁calls▁end｜> (also missing the <｜end▁of▁sentence｜>, but that is removed lazily by the test's delta logic)
-        //               "```<｜tool▁call▁end｜>",
-        //               /* expect_grammar_triggered= */ true,
-        //               /* test_grammar_if_triggered= */ false);
-    }
-    {
-        // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
-        auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-
-        assert_msg_equals(message_assist_call_thoughts_unparsed,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>\n\n"
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<｜tool▁calls｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>\n\n"
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>");
+        // Note: Hermes template doesn't support thinking/reasoning natively
+        // Note: We only support one tool calling format per template, no alternate formats
     }
     {
         auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja");
@@ -1634,20 +1410,20 @@ static void test_template_output_parsers() {
 
         // Test parsing regular content
         assert_msg_equals(message_assist,
-            test_chat_parse(
+            common_chat_parse(
                 "Hello, world!\nWhat's up?",
                 /* is_partial= */ false,
                 {COMMON_CHAT_FORMAT_GRANITE}));
         assert_msg_equals(
             message_assist,
-            test_chat_parse(
+            common_chat_parse(
                 "Hello, world!\nWhat's up?",
                 /* is_partial= */ true,
                 {COMMON_CHAT_FORMAT_GRANITE}));
 
         // Test parsing content with thinking
         assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
+            common_chat_parse(
                 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
                 /* is_partial= */ false,
                 {
@@ -1655,12 +1431,12 @@ static void test_template_output_parsers() {
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
                 }));
         assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
+            common_chat_parse(
                 "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
                 /* is_partial= */ false,
                 {COMMON_CHAT_FORMAT_GRANITE}));
         assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
+            common_chat_parse(
                 "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
                 /* is_partial= */ true,
                 {
@@ -1668,7 +1444,7 @@ static void test_template_output_parsers() {
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
                 }));
         assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
+            common_chat_parse(
                 "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
                 /* is_partial= */ false,
                 {
@@ -1676,12 +1452,12 @@ static void test_template_output_parsers() {
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
                 }));
         assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
-            test_chat_parse(
+            common_chat_parse(
                 "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
                 /* is_partial= */ false,
                 {COMMON_CHAT_FORMAT_GRANITE}));
         assert_msg_equals(message_assist_empty,
-            test_chat_parse(
+            common_chat_parse(
                 "<think",
                 /* is_partial= */ true,
                 {
@@ -1689,12 +1465,12 @@ static void test_template_output_parsers() {
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
                 }));
         assert_msg_equals(message_assist_empty,
-            test_chat_parse(
+            common_chat_parse(
                 "<think",
                 /* is_partial= */ true,
                 {COMMON_CHAT_FORMAT_GRANITE}));
         assert_msg_equals(message_assist_thoughts_no_content,
-            test_chat_parse(
+            common_chat_parse(
                 "<think>I'm\nthinking",
                 /* is_partial= */ true,
                 {
@@ -1703,329 +1479,57 @@ static void test_template_output_parsers() {
                 }));
         assert_msg_equals(
             message_assist_empty,
-            test_chat_parse(
+            common_chat_parse(
                 "<think>I'm\nthinking</think><response",
                 /* is_partial= */ true,
                 {COMMON_CHAT_FORMAT_GRANITE}));
 
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_empty_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+    // Note: Functionary and Firefunction have dedicated handlers, not tested with auto-parser
 
-        // Test parsing tool calls with thinking
-        assert_msg_equals(
-            message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-    // TODO @ngxson : generic tool call should be removed in the future
-#if 0
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
-                      "{\n"
-                      "  \"tool_calls\": [\n"
-                      "    {\n"
-                      "      \"name\": \"special_function\",\n"
-                      "      \"arguments\": {\n"
-                      "        \"arg1\": 1\n"
-                      "      },\n"
-                      "      \"id\": \"123456789\"\n"
-                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
-                      "}",
-                      /* expect_grammar_triggered= */ false
-        );
-#endif
-    }
     {
-        auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja");
-        std::vector<std::string> end_tokens{ "<|return|>", "<|call|>" };
+        // Test simple content-only template
+        auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja", detailed_debug);
 
-        assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        assert_msg_equals(simple_assist_msg("", "I'm\nthink"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthink",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-
-        // Test parse_tool_calls == false
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-
-        // Test reasoning formats
-        assert_msg_equals(
-            simple_assist_msg(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-                }));
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ true,
-                }));
-
-        // Test tool calling in role header
-        assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                " to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                " to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
     }
+
     {
-        // Seed-OSS format tests
-        auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja");
-        std::vector<std::string> end_tokens{ "<seed:eos>" };
+        // IBM Granite (reasoning and tool calling model)
+        auto tst = peg_tester("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", detailed_debug);
 
-        assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
 
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+        tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
 
-        // Test simple reasoning content
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!", "I'm thinking about the answer"),
-            test_chat_parse(
-                "<seed:think>I'm thinking about the answer</seed:think>Hello, world!",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+        tst.test("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+    }
 
-        // Test budget reflection tags
-        common_chat_msg msg_budget_reflect;
-        msg_budget_reflect.role = "assistant";
-        msg_budget_reflect.content = "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>I need to calculate this step by step.";
-        msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution.";
-        assert_msg_equals(
-            msg_budget_reflect,
-            test_chat_parse(
-                "<seed:think>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:think>"
-                "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>"
-                "I need to calculate this step by step.",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+    {
+        // ByteDance-Seed-OSS (reasoning and tool calling model)
+        auto tst = peg_tester("models/templates/ByteDance-Seed-OSS.jinja", detailed_debug);
 
-        // Test tool calls with Seed-OSS format
-        common_chat_msg msg_tool_call;
-        msg_tool_call.role = "assistant";
-        msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
-        assert_msg_equals(
-            msg_tool_call,
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1, 2, 3]</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
 
-        // Test reasoning + tool call combination
-        common_chat_msg msg_reasoning_tool;
-        msg_reasoning_tool.role = "assistant";
-        msg_reasoning_tool.content = "";
-        msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers";
-        msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
-        assert_msg_equals(
-            msg_reasoning_tool,
-            test_chat_parse(
-                "<seed:think>I need to calculate the sum of these numbers</seed:think>"
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1, 2, 3]</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+        tst.test("<seed:think>I'm thinking about the answer</seed:think>Hello, world!")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer"))
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
 
         // Test deltas: the number of tool calls in partial parses should never decrease
         std::string tool_msg = "<seed:tool_call>\n"
@@ -2035,7 +1539,7 @@ static void test_template_output_parsers() {
         std::size_t previousToolCalls = 0;
         for (std::size_t i = std::string("<seed:tool_call>").length(); i < tool_msg.length() - 1; i++) {
             auto partial = tool_msg.substr(0, i);
-            auto partial_res = test_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK });
+            auto partial_res = common_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK });
             if (partial_res.tool_calls.size() < previousToolCalls) {
                 throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size()));
             }
@@ -2048,1889 +1552,738 @@ static void test_template_output_parsers() {
         msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""});
         assert_msg_equals(
             msg_multi_param,
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=process_data>\n"
-                "<parameter=input>test</parameter>\n"
-                "<parameter=format>json</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
+            common_chat_parse(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("", "I need to call a function", "special_function", R"({"arg1": 1})"))
+            .run();
 
-        // Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done
-        assert_msg_equals(
-            simple_assist_msg("", "", "calculate_sum", "{\"numbers\":"),
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1,\n",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>\n"
+               "<seed:tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "<parameter=arg2>2</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
 
-        // Test incomplete reasoning tag
-        assert_msg_equals(
-            simple_assist_msg("", "I was thinking"),
-            test_chat_parse(
-                "<seed:think>I was thinking",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>\n"
+               "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
 
-        // Test content without reasoning
-        assert_msg_equals(
-            simple_assist_msg("This is a simple response without reasoning."),
-            test_chat_parse(
-                "This is a simple response without reasoning.",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-    }
-    {
-        auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja");
-        std::vector<std::string> end_tokens{ "<SPECIAL_12>" };
+        // single-quote normalization
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>\n"
+               "[{'item': 'Check stuff', 'selected': false}, {'item': 'Prepare stuff', 'selected': true}]\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
 
-        assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?\n",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                      /* expect_grammar_triggered= */ true
-        );
-    }
-    {
-        auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format);
-            assert_equals(true, params.thinking_forced_open);
-        }
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        // variant: thinking forced open, reasoning_format none
-        assert_msg_equals(
-            simple_assist_msg("REASONING</think>ok", ""),
-            test_chat_parse(
-                "REASONING</think>ok",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: happy path for when it works as the model card says it should
-        assert_msg_equals(
-            simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: simple + thinking open
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: simple + multiple tool calls
-        common_chat_msg message_assist_multiple_calls;
-        message_assist_multiple_calls.role = "assistant";
-        message_assist_multiple_calls.content = "CONTENT";
-        message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
-        //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
-        //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
-        //          add the reasoning content as regular content and parse the tool calls.
-        assert_msg_equals(
-            simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content + no closing think + partial
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>", "", ""),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ true,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking not forced open + missing reasoning + no tool calls
-        assert_msg_equals(
-            simple_assist_msg("CONTENT", ""),
-            test_chat_parse(
-                "CONTENT",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-    }
-    {
-        auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja");
-        std::vector<std::string> end_tokens{ "<|assistant_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                      /* expect_grammar_triggered= */ true
-        );
-
-        // TODO @ngxson : not sure why this fails, but not very important for now
-        // assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
-    }
-    {
-        // LFM2 format tests
-        auto tmpls = read_templates("models/templates/llama-cpp-lfm2.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        auto inputs_tools_forced_json_schema = std::invoke([&]() -> common_chat_templates_inputs {
-            common_chat_templates_inputs inputs;
-            inputs.messages = {
-                std::invoke([&]() -> common_chat_msg {
-                    common_chat_msg msg;
-                    msg.role = "system";
-                    msg.content = "force json schema.\n";
-                    return msg;
-                }),
-                message_user,
-            };
-            inputs.tools = {special_function_tool};
-            return inputs;
-        });
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_no_tools);
-            assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
-            assert_equals(false, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-        }
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_tools);
-            assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
-            assert_equals(false, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-            assert_equals(true, params.grammar.empty());
-        }
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_tools_forced_json_schema);
-            assert_equals(COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, params.format);
-            assert_equals(true, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-            assert_equals(false, params.grammar.empty());
-        }
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test single tool call with JSON format
-        common_chat_msg msg_single_tool_call;
-        msg_single_tool_call.role = "assistant";
-        msg_single_tool_call.tool_calls.push_back({"special_function", "{\"arg1\":1}", ""});
-        assert_msg_equals(
-            msg_single_tool_call,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with string argument
-        common_chat_msg msg_tool_call_string;
-        msg_tool_call_string.role = "assistant";
-        msg_tool_call_string.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_tool_call_string,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with multiple arguments
-        common_chat_msg msg_multi_args;
-        msg_multi_args.role = "assistant";
-        msg_multi_args.tool_calls.push_back({"calculate", "{\"x\":10,\"y\":20,\"operation\":\"add\"}", ""});
-        assert_msg_equals(
-            msg_multi_args,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"calculate\", \"arguments\": {\"x\": 10, \"y\": 20, \"operation\": \"add\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test multiple tool calls in single array
-        common_chat_msg msg_multiple_tools;
-        msg_multiple_tools.role = "assistant";
-        msg_multiple_tools.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        msg_multiple_tools.tool_calls.push_back({"get_time", "{\"timezone\":\"UTC\"}", ""});
-        assert_msg_equals(
-            msg_multiple_tools,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}, {\"name\": \"get_time\", \"arguments\": {\"timezone\": \"UTC\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with content before
-        common_chat_msg msg_content_before_tool;
-        msg_content_before_tool.role = "assistant";
-        msg_content_before_tool.content = "Let me check the weather for you.";
-        msg_content_before_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_content_before_tool,
-            test_chat_parse(
-                "Let me check the weather for you.<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with content after
-        common_chat_msg msg_content_after_tool;
-        msg_content_after_tool.role = "assistant";
-        msg_content_after_tool.content = "Here's the result.";
-        msg_content_after_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_content_after_tool,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>Here's the result.",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with newlines (common in LLM output)
-        common_chat_msg msg_tool_call_newlines;
-        msg_tool_call_newlines.role = "assistant";
-        msg_tool_call_newlines.tool_calls.push_back({"get_current_time", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_tool_call_newlines,
-            test_chat_parse(
-                "<|tool_call_start|>[{\n    \"name\": \"get_current_time\",\n    \"arguments\": {\n        \"location\": \"Paris\"\n    }\n}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Note: LFM2 uses JSON format for tool calls: [{"name": "...", "arguments": {...}}]
-        // Unlike other formats, LFM2 template does not render tool calls in conversation history,
-        // so we don't use test_templates() for tool call generation. Instead, the parsing tests
-        // above verify edge cases and format variations for the tool call output format.
+        // single-quote normalization and tool call with inside quotes
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=edit>\n"
+               "<parameter=filename>\n"
+               "foo.cpp\n"
+               "</parameter>\n"
+               "<parameter=oldString>\n"
+               "def foo(arg = \"14\"):\n"
+               "    return arg + \"bar\"\n"
+               "\n"
+               "</parameter>\n"
+               "<parameter=newString>\n"
+               "def foo(arg = \"15\"):\n"
+               "    pass\n"
+               "\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                edit_tool
+        })
+            .expect_tool_calls({
+                { "edit", "{\"filename\": \"foo.cpp\", "
+                    "\"oldString\": \"def foo(arg = \\\"14\\\"):\\n    return arg + \\\"bar\\\"\\n\", "
+                    "\"newString\": \"def foo(arg = \\\"15\\\"):\\n    pass\\n\"}", {}
+                }
+            })
+            .run();
     }
 
     {
-        auto tmpls = read_templates("models/templates/MiniMax-M2.jinja");
-        std::vector<std::string> end_tokens{ "[e~[" };
+        // Qwen3-Coder (tool calling with XML-style format)
+        auto tst = peg_tester("models/templates/Qwen3-Coder.jinja", detailed_debug);
 
-        assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
 
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}));
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
 
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "<parameter=arg2>\n"
+               "2\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
 
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}));
+        // Test with code content (multiline)
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
 
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
+        // Test with HTML tag content
+        tst.test(
+               "<tool_call>\n"
+               "<function=html>\n"
+               "<parameter=markup>\n"
+               "<html>\n"
+               " <head>\n"
+               "  <title>Hello!</title>\n"
+               " </head>\n"
+               "</html>\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                html_tool
+        })
+            .expect_tool_calls({
+                { "html", "{\"markup\": \"<html>\\n <head>\\n  <title>Hello!</title>\\n </head>\\n</html>\"}", {} },
+            })
+            .run();
 
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "<think>I'm\nthinking</think>\n\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
+        // Test with TODO list (array of objects)
+        tst.test(
+               "<tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>\n"
+               "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
+    }
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"XYZCITY\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+            .run();
     }
 
     {
-        auto tmpls = read_templates("models/templates/GLM-4.6.jinja");
-        std::vector<std::string>   end_tokens{ "<|assistant|>", "<|observation|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>\nHello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }), true);
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}), true);
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }), true);
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}
-            ), true);
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }), true);
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "\n<think>I'm\nthinking</think>\n\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-            test_parser_with_streaming(
-                simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<tool_call>complex_function\n"
-                "<arg_key>name</arg_key>\n"
-                "<arg_value>John Doe</arg_value>\n"
-                "<arg_key>age</arg_key>\n"
-                "<arg_value>30</arg_value>\n"
-                "<arg_key>active</arg_key>\n"
-                "<arg_value>true</arg_value>\n"
-                "<arg_key>score</arg_key>\n"
-                "<arg_value>95.5</arg_value>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
-                "<tool_call>web_search\n"
-                "<arg_key>query</arg_key>\n"
-                "<arg_value>\"From Zero\" Linkin Park album tracklist complete songs</arg_value>\n"
-                "<arg_key>limit</arg_key>\n"
-                "<arg_value>3</arg_value>\n"
-                "<arg_key>type</arg_key>\n"
-                "<arg_value>text</arg_value>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-
-        // Test interleaved thinking
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\n\nWhat's up?", "I'm\nthinkingThinking2", "special_function", "{\"arg1\": 1}"),
-            "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?", "", "special_function", "{\"arg1\": 1}"),
-            "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "\n<think></think>\nHello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "\n<think></think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls_and_reasoning("get_time", "{\"city\":\"Tokyo\"}", "REASONING"))
+            .run();
     }
 
     {
-        auto tmpls = read_templates("models/templates/Kimi-K2-Thinking.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "REASONING</think>CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": "
+               "\"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({
+                get_time_tool, get_weather_tool
+        })
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .parallel_tool_calls(true)
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "REASONING", "CONTENT",
+                { { "get_time", "{\"city\":\"Paris\"}" }, { "get_weather", "{\"city\":\"Paris\"}" } }))
+            .run();
+    }
 
-        assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test("REASONING</think>\nCONTENT")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(simple_assist_msg("CONTENT", "REASONING\n"))
+            .run();
+    }
 
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}));
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test("CONTENT").expect(simple_assist_msg("CONTENT", "")).run();
+    }
 
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
+    // GLM-4.6 tests - format: <tool_call>function_name\n<arg_key>...</arg_key>\n<arg_value>...</arg_value>\n</tool_call>
+    {
+        auto tst = peg_tester("models/templates/GLM-4.6.jinja");
+        tst.test(
+               "<tool_call>special_function\n"
+               "<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // GLM-4.7-Flash tests - format: <tool_call>function_name<arg_key>...</arg_key><arg_value>...</arg_value></tool_call>
+    // Note: Template uses forced-open thinking mode (prompt ends with <think>)
+    {
+        auto tst = peg_tester("models/templates/GLM-4.7-Flash.jinja", detailed_debug);
+
+        // Pure content (no reasoning)
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .expect(message_assist)
+            .run();
+
+        // Reasoning with content (forced-open mode - input starts after <think>)
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Tool call without reasoning
+        tst.test(
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>")
+            .enable_thinking(false)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with reasoning (forced-open mode)
+        tst.test(
+               "I'm\nthinking</think>"
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // String argument starting with '[' - should NOT be treated as JSON array
+        // This tests the fix for Godot scene files and similar content
+        tst.test(
+               "<tool_call>html"
+               "<arg_key>markup</arg_key><arg_value>[gd_scene load_steps=3 format=3]</arg_value>"
+               "</tool_call>")
+            .enable_thinking(false)
+            .tools({ html_tool })
+            .expect_tool_calls({
+                { "html", "{\"markup\": \"[gd_scene load_steps=3 format=3]\"}", {} },
+            })
+            .run();
+
+        // Multiple tool calls
+        // Note: Parallel tool calls streaming test skipped - the KEY_VALUE_TAGS format has
+        // partial parsing edge cases when function names share common prefixes (special_function vs special_function_with_opt)
+        // The grammar and full parsing work correctly, but incremental streaming detection needs more work.
+    }
+
+    // Kimi-K2-Thinking tests - FUNC_PREFIXED_INDEXED format
+    {
+        auto tst = peg_tester("models/templates/Kimi-K2-Thinking.jinja", detailed_debug);
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format
+    // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|>
+    {
+        auto tst = peg_tester("models/templates/Apertus-8B-Instruct.jinja", detailed_debug);
+        tst.test("<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // MiniMax-M2 tests - XML invoke format with parameter tags
+    // Format: <minimax:tool_call><invoke name="func"><parameter name="key">value</parameter></invoke></minimax:tool_call>
+    {
+        auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug);
+        tst.test(
+               "<minimax:tool_call>\n<invoke name=\"special_function\"><parameter "
+               "name=\"arg1\">1</parameter></invoke>\n</minimax:tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // NVIDIA-Nemotron-Nano-v2 tests - <TOOLCALL>...</TOOLCALL> format
+    // Format: <TOOLCALL>[{"name": "func", "arguments": {...}}]</TOOLCALL>
+    {
+        auto tst = peg_tester("models/templates/NVIDIA-Nemotron-Nano-v2.jinja", detailed_debug);
+        tst.test("<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // CohereForAI-c4ai-command-r7b (uses START_RESPONSE/END_RESPONSE, START_THINKING/END_THINKING, START_ACTION/END_ACTION)
+    {
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+        tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_thoughts_call_idx)
+            .run();
+    }
+    // CohereForAI-c4ai-command-r-plus (uses markdown code block format)
+    {
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", detailed_debug);
+        tst.test("<|CHATBOT_TOKEN|>Hello, world!\nWhat's up?<|END_OF_TURN_TOKEN|>").expect(message_assist).run();
+        // Tool calls: Action: followed by JSON code block
+        tst.test(
+               "Action:\n"
+               "```json\n"
+               "[{\"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}]\n"
+               "```")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // mistralai-Mistral-Nemo-Instruct-2407.jinja
+    {
+        auto tst = peg_tester("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_id)
+            .run();
+    }
+    {
+        auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.1.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<function=special_function>{\"arg1\": 1}</function>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+    // Functionary v3.2 - recipient-based format: >>>recipient\n{content}
+    {
+        auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug);
+        tst.test(">>>all\nHello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(">>>special_function\n{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // FireFunction
+    {
+        auto tst = peg_tester("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // DeepSeek R1 Distill Llama 8B - reasoning tests only (forced open thinking)
+    // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(true)  // Forced open
+            .expect(message_assist)
+            .run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+    }
+    // llama-cpp DeepSeek R1 template (always forced-open thinking)
+    {
+        auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
+               "```json\n{\"arg1\": 1}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ special_function_tool })
+            .parallel_tool_calls(true)
+            .expect(message_assist_call)
+            .run();
+    }
+    // DeepSeek R1 Distill Qwen 32B - reasoning tests only (forced open thinking)
+    // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").enable_thinking(true).expect(message_assist).run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
+               "```json\n{\"arg1\": 1}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+    // Kimi-K2 (moonshotai) - FUNC_PREFIXED_INDEXED format
+    {
+        auto tst = peg_tester("models/templates/moonshotai-Kimi-K2.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+    // Kimi-K2-Instruct - FUNC_PREFIXED_INDEXED format
+    {
+        auto tst = peg_tester("models/templates/Kimi-K2-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // MiMo-VL / Hermes 3 / Qwen 2.5 (Common <tool_call> JSON format)
+    for (const auto & path :
+         { "models/templates/MiMo-VL.jinja", "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+           "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja" }) {
+        auto tst = peg_tester(path, detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // Apriel 1.5
+    {
+        auto tst = peg_tester("models/templates/unsloth-Apriel-1.5.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<tool_calls>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</tool_calls>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // Apriel 1.6 Thinker (reasoning-only support)
+    {
+        auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Implicit reasoning start (forced open)
+        tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Reasoning + Tool calls
+        tst.test(
+               "I'm\nthinking\n[BEGIN FINAL RESPONSE]\n<tool_calls>[{\"name\": \"special_function\", \"arguments\": "
+               "{\"arg1\": 1}}]</tool_calls>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+    }
+
+    // Mistral Small 3.2 - FUNC_BRACKET_TAG format: [TOOL_CALLS]func_name[CALL_ID]id[ARGS]{...}
+    {
+        auto tst = peg_tester("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS]special_function[CALL_ID]123456789[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_id)
+            .run();
+    }
+    // Devstral - FUNC_BRACKET_TAG format (no ID marker): [TOOL_CALLS]func_name[ARGS]{...}
+    {
+        auto tst = peg_tester("models/templates/unsloth-mistral-Devstral-Small-2507.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+        tst.test("Hello, world!\nWhat's up?[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .run();
+    }
+
+    {
+        // Llama 3.1
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run();
+    }
+
+    {
+        // Llama 3.2
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run();
+    }
+
+    {
+        // Llama 3.3
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ python_tool }).expect(message_assist).run();
+    }
+
+    // GPT-OSS format tests
+    {
+        auto tst = peg_tester("models/templates/openai-gpt-oss-120b.jinja", detailed_debug);
+
+        // Basic content only - final channel
+        tst.test("<|channel|>final<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Basic content only - commentary channel
+        tst.test("<|channel|>commentary<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Analysis channel (reasoning) with final channel (content)
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Analysis channel only (partial) - still works when reasoning format is set
+        tst.test("<|channel|>analysis<|message|>I'm\nthinking")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .is_partial(true)
+            .expect_reasoning("I'm\nthinking")
+            .run();
+
+        // Reasoning format none - reasoning stays in content
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content(
+                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?")
+            .run();
+
+        // Tool call with recipient in role header: " to=functions.NAME<|channel|>analysis<|message|>JSON"
+        tst.test(" to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with recipient in channel header: "<|channel|>analysis to=functions.NAME<|message|>JSON"
+        tst.test("<|channel|>analysis to=functions.special_function<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with constraint: " to=functions.NAME<|channel|>analysis <|constrain|>json<|message|>JSON"
+        tst.test(" to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call in commentary channel (channel header variant)
+        tst.test("<|channel|>commentary to=functions.special_function<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with reasoning + content (analysis first, then tool call)
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n"
+               "<|start|>assistant to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Tool calling with extra channel before
+        tst.test(
+                "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>commentary"
+                " to=functions.special_function <|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Reasoning after final channel
+        // Tool calling after final channel
+        tst.test(
+            "<|channel|>final<|message|><|end|>"
+            "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit..."
+        )
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect_reasoning("Thinking about edit...")
+            .expect_content("")
+            .run();
+
+        // Tool calling after final channel
+        tst.test(
+            "<|channel|>final<|message|><|end|>"
+            "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit...<|end|>"
+            "<|start|>assistant<|channel|>commentary to=functions.edit <|constrain|>json"
+            "<|message|>{\"filePath\": \"file.js\", \"oldString\": \"if (part < railCount - 1) {\", \"newString\": \"if (part < 4) {\", \"replaceAll\": false}"
+            )
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
                 {
-                    /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "<think>I'm\nthinking</think>\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": \"123456\"}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": \"123456\"}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": [1, 2, \"345\", 6]}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": [1, 2, \"345\", 6]}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:0<|tool_call_argument_begin|>"
-                "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg(
-                        "Let me start by examining the relevant files to understand the current implementation.", "",
-                        "read_file",
-                        "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}"),
-                "Let me start by examining the relevant files to understand the current implementation."
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        auto multi_tool_msg = simple_assist_msg("Let me call multiple tools.", "I'm thinking.");
-        multi_tool_msg.tool_calls.push_back({ "read_file", "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}", "" });
-        multi_tool_msg.tool_calls.push_back({ "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}", "" });
-        multi_tool_msg.tool_calls.push_back({ "complex_function", "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}", "" });
-        multi_tool_msg.tool_calls.push_back({ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" });
-        test_parser_with_streaming(multi_tool_msg,
-                "<think>I'm thinking.</think>Let me call multiple tools."
-                "<|tool_calls_section_begin|>"
-                "<|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>"
-                "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.complex_function:2<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.emoji_function:3<|tool_call_argument_begin|>"
-                "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}"
-                "<|tool_call_end|>"
-                "<|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "I'm thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("Hello", "I'm thinkingI'm still thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>I'm still thinking</think>Hello",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-
-        // Test template rendering
-        common_chat_templates_inputs conversation_with_tools = inputs_tools;
-        conversation_with_tools.messages.push_back(simple_assist_msg("Let's do it", "Think first", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 1",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "complex_function",
-            /* .tool_call_id = */ "",
-        });
-        conversation_with_tools.messages.push_back(simple_assist_msg("Continue", "Think next", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 2",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "web_search",
-            /* .tool_call_id = */ "",
-        });
-        conversation_with_tools.messages.push_back(simple_assist_msg("CC", "Think last", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 3",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "read_file",
-            /* .tool_call_id = */ "",
-        });
-        assert_equals(common_chat_templates_apply(tmpls.get(), conversation_with_tools).prompt, std::string("<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"special_function\", \"description\": \"I'm special\", \"parameters\": {\"type\": \"object\", \"properties\": {\"arg1\": {\"type\": \"integer\", \"description\": \"The arg.\"}}, \"required\": [\"arg1\"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hey there!<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think first</think>Let's do it<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>complex_function<|im_middle|>## Return of functions.complex_function:0\nTool response 1<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think next</think>Continue<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>web_search<|im_middle|>## Return of functions.web_search:1\nTool response 2<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think last</think>CC<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:2<|tool_call_argument_begin|>{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>read_file<|im_middle|>## Return of functions.read_file:2\nTool response 3<|im_end|><|im_assistant|>assistant<|im_middle|>"));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "<think></think>Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-    }
-
-    // Test Qwen3-Coder XML format
-    {
-        // Basic XML tool call parsing
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool_call>\n"
-                "  <function=special_function>\n"
-                "    <parameter=arg1>\n"
-                "      1\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
-
-        // Multiple parameters with different types
-        common_chat_msg expected_multi_param;
-        expected_multi_param.role = "assistant";
-        expected_multi_param.tool_calls = {
-            { "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}", "" }
-        };
-
-        test_parser_with_streaming(expected_multi_param,
-                "<tool_call>\n"
-                "  <function=complex_function>\n"
-                "    <parameter=name>\n"
-                "      John Doe\n"
-                "    </parameter>\n"
-                "    <parameter=age>\n"
-                "      30\n"
-                "    </parameter>\n"
-                "    <parameter=active>\n"
-                "      true\n"
-                "    </parameter>\n"
-                "    <parameter=score>\n"
-                "      95.5\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Special characters and Unicode
-        common_chat_msg expected_special_chars;
-        expected_special_chars.role = "assistant";
-        expected_special_chars.tool_calls = {
-            { "unicode_function", "{\"message\":\"Hello 世界! 🌍 Special chars: @#$%^&*()\"}", "" }
-        };
-
-        test_parser_with_streaming(expected_special_chars,
-                "<tool_call>\n"
-                "  <function=unicode_function>\n"
-                "    <parameter=message>\n"
-                "      Hello 世界! 🌍 Special chars: @#$%^&*()\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Multiline content with newlines and indentation
-        common_chat_msg expected_multiline;
-        expected_multiline.role = "assistant";
-        expected_multiline.tool_calls = {
-            { "code_function", "{\"code\":\"def hello():\\n    print(\\\"Hello, World!\\\")\\n    return True\"}", "" }
-        };
-
-        test_parser_with_streaming(expected_multiline,
-                "<tool_call>\n"
-                "  <function=code_function>\n"
-                "    <parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, World!\")\n"
-                "    return True\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // JSON object as parameter value
-        common_chat_msg expected_json_param;
-        expected_json_param.role = "assistant";
-        expected_json_param.tool_calls = {
-            { "json_function", "{\"config\":{\"host\":\"localhost\",\"port\":8080,\"ssl\":false}}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_json_param,
-                "<tool_call>\n"
-                "  <function=json_function>\n"
-                "    <parameter=config>\n"
-                "      {\"host\": \"localhost\", \"port\": 8080, \"ssl\": false}\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Array as parameter value
-        common_chat_msg expected_array_param;
-        expected_array_param.role = "assistant";
-        expected_array_param.tool_calls = {
-            { "array_function", "{\"items\":[\"apple\",\"banana\",\"cherry\"]}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_array_param,
-                "<tool_call>\n"
-                "  <function=array_function>\n"
-                "    <parameter=items>\n"
-                "      [\"apple\", \"banana\", \"cherry\"]\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Empty parameter
-        common_chat_msg expected_empty_param;
-        expected_empty_param.role = "assistant";
-        expected_empty_param.tool_calls = {
-            { "empty_function", "{\"empty_param\":\"\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_empty_param,
-                "<tool_call>\n"
-                "  <function=empty_function>\n"
-                "    <parameter=empty_param>\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Boolean values (true/false)
-        common_chat_msg expected_boolean;
-        expected_boolean.role = "assistant";
-        expected_boolean.tool_calls = {
-            { "boolean_function", "{\"enabled\":true,\"debug\":false}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_boolean,
-                "<tool_call>\n"
-                "  <function=boolean_function>\n"
-                "    <parameter=enabled>\n"
-                "      true\n"
-                "    </parameter>\n"
-                "    <parameter=debug>\n"
-                "      false\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Null value
-        common_chat_msg expected_null;
-        expected_null.role = "assistant";
-        expected_null.tool_calls = {
-            { "null_function", "{\"optional_param\":null}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_null,
-                "<tool_call>\n"
-                "  <function=null_function>\n"
-                "    <parameter=optional_param>\n"
-                "      null\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Negative numbers and scientific notation
-        common_chat_msg expected_numbers;
-        expected_numbers.role = "assistant";
-        expected_numbers.tool_calls = {
-            { "math_function", "{\"negative\":-42,\"decimal\":-3.14,\"scientific\":1.23e-4}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_numbers,
-                "<tool_call>\n"
-                "  <function=math_function>\n"
-                "    <parameter=negative>\n"
-                "      -42\n"
-                "    </parameter>\n"
-                "    <parameter=decimal>\n"
-                "      -3.14\n"
-                "    </parameter>\n"
-                "    <parameter=scientific>\n"
-                "      1.23e-4\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // XML-like content in parameters (should be escaped)
-        common_chat_msg expected_xml_content;
-        expected_xml_content.role = "assistant";
-        expected_xml_content.tool_calls = {
-            { "xml_function", "{\"xml_content\":\"<root><item>value</item></root>\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_xml_content,
-                "<tool_call>\n"
-                "  <function=xml_function>\n"
-                "    <parameter=xml_content>\n"
-                "      <root><item>value</item></root>\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Quotes and escape characters
-        common_chat_msg expected_quotes;
-        expected_quotes.role = "assistant";
-        expected_quotes.tool_calls = {
-            { "quote_function", "{\"message\":\"She said \\\"Hello!\\\" and left.\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_quotes,
-                "<tool_call>\n"
-                "  <function=quote_function>\n"
-                "    <parameter=message>\n"
-                "      She said \"Hello!\" and left.\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Long parameter value (simplified)
-        std::string long_text = "This is a long text parameter that should test the parser's ability to handle larger amounts of text data.";
-
-        common_chat_msg expected_long_text;
-        expected_long_text.role = "assistant";
-        expected_long_text.tool_calls = {
-            { "long_function", "{\"long_text\":\"" + long_text + "\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_long_text,
-                "<tool_call>\n"
-                "  <function=long_function>\n"
-                "    <parameter=long_text>\n"
-                "      " + long_text + "\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Mixed content with text before and after tool call
-        common_chat_msg expected_mixed_content;
-        expected_mixed_content.role = "assistant";
-        expected_mixed_content.content = "I'll help you search for products. ";
-        expected_mixed_content.tool_calls = {
-            { "search_function", "{\"query\":\"laptops\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_mixed_content,
-                "I'll help you search for products. <tool_call>\n"
-                "  <function=search_function>\n"
-                "    <parameter=query>\n"
-                "      laptops\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Compact format (no extra whitespace)
-        common_chat_msg expected_compact;
-        expected_compact.role = "assistant";
-        expected_compact.tool_calls = {
-            { "compact_function", "{\"param\":\"value\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_compact,
-                "<tool_call><function=compact_function><parameter=param>value</parameter></function></tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Function name with underscores and numbers
-        common_chat_msg expected_complex_name;
-        expected_complex_name.role = "assistant";
-        expected_complex_name.tool_calls = {
-            { "get_user_data_v2", "{\"user_id\":12345}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_complex_name,
-                "<tool_call>\n"
-                "  <function=get_user_data_v2>\n"
-                "    <parameter=user_id>\n"
-                "      12345\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Parameter names with underscores and numbers
-        common_chat_msg expected_complex_params;
-        expected_complex_params.role = "assistant";
-        expected_complex_params.tool_calls = {
-            { "test_function", "{\"param_1\":\"value1\",\"param_2_name\":\"value2\",\"param3\":123}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_complex_params,
-                "<tool_call>\n"
-                "  <function=test_function>\n"
-                "    <parameter=param_1>\n"
-                "      value1\n"
-                "    </parameter>\n"
-                "    <parameter=param_2_name>\n"
-                "      value2\n"
-                "    </parameter>\n"
-                "    <parameter=param3>\n"
-                "      123\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Very deeply nested XML content in parameter
-        common_chat_msg expected_deep_xml;
-        expected_deep_xml.role = "assistant";
-        expected_deep_xml.tool_calls = {
-            { "xml_parser", "{\"xml\":\"<root><level1><level2><level3>deep content</level3></level2></level1></root>\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_deep_xml,
-                "<tool_call>\n"
-                "  <function=xml_parser>\n"
-                "    <parameter=xml>\n"
-                "      <root><level1><level2><level3>deep content</level3></level2></level1></root>\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Parameter with only whitespace
-        common_chat_msg expected_whitespace_param;
-        expected_whitespace_param.role = "assistant";
-        expected_whitespace_param.tool_calls = {
-            { "whitespace_function", "{\"spaces\":\"\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_whitespace_param,
-                "<tool_call>\n"
-                "  <function=whitespace_function>\n"
-                "    <parameter=spaces>\n"
-                "      \n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Parameter with tabs and mixed whitespace
-        common_chat_msg expected_mixed_whitespace;
-        expected_mixed_whitespace.role = "assistant";
-        expected_mixed_whitespace.tool_calls = {
-            { "tab_function", "{\"content\":\"line1\\n\\tindented line\\n    spaces\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_mixed_whitespace,
-                "<tool_call>\n"
-                "  <function=tab_function>\n"
-                "    <parameter=content>\n"
-                "line1\n"
-                "\tindented line\n"
-                "    spaces\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Control characters and special Unicode
-        common_chat_msg expected_control_chars;
-        expected_control_chars.role = "assistant";
-        expected_control_chars.tool_calls = {
-            { "control_function", "{\"text\":\"Line1\\nLine2\\tTabbed\\rCarriage return\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_control_chars,
-                "<tool_call>\n"
-                "  <function=control_function>\n"
-                "    <parameter=text>\n"
-                "Line1\nLine2\tTabbed\rCarriage return\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Emoji and extended Unicode characters
-        common_chat_msg expected_emoji;
-        expected_emoji.role = "assistant";
-        expected_emoji.tool_calls = {
-            { "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_emoji,
-                "<tool_call>\n"
-                "  <function=emoji_function>\n"
-                "    <parameter=message>\n"
-                "      Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Mathematical expressions and formulas
-        common_chat_msg expected_math;
-        expected_math.role = "assistant";
-        expected_math.tool_calls = {
-            { "math_function", "{\"formula\":\"E = mc² and ∫f(x)dx = F(x) + C\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_math,
-                "<tool_call>\n"
-                "  <function=math_function>\n"
-                "    <parameter=formula>\n"
-                "      E = mc² and ∫f(x)dx = F(x) + C\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // SQL injection-like content (should be safely escaped)
-        common_chat_msg expected_sql;
-        expected_sql.role = "assistant";
-        expected_sql.tool_calls = {
-            { "sql_function", "{\"query\":\"SELECT * FROM users WHERE id = 1; DROP TABLE users; --\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_sql,
-                "<tool_call>\n"
-                "  <function=sql_function>\n"
-                "    <parameter=query>\n"
-                "      SELECT * FROM users WHERE id = 1; DROP TABLE users; --\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // HTML/XML injection content
-        common_chat_msg expected_html;
-        expected_html.role = "assistant";
-        expected_html.tool_calls = {
-            { "html_function", "{\"content\":\"<script>alert('xss')</script><img src=x onerror=alert(1)>\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_html,
-                "<tool_call>\n"
-                "  <function=html_function>\n"
-                "    <parameter=content>\n"
-                "      <script>alert('xss')</script><img src=x onerror=alert(1)>\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Binary-like content (base64)
-        common_chat_msg expected_binary;
-        expected_binary.role = "assistant";
-        expected_binary.tool_calls = {
-            { "binary_function", "{\"data\":\"SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_binary,
-                "<tool_call>\n"
-                "  <function=binary_function>\n"
-                "    <parameter=data>\n"
-                "      SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Very large numbers (should be parsed as scientific notation)
-        common_chat_msg expected_large_numbers;
-        expected_large_numbers.role = "assistant";
-        expected_large_numbers.tool_calls = {
-            { "number_function", "{\"big_int\":1e+60}", "" }  // Large number becomes scientific notation
-        };
-
-        test_parser_with_streaming(
-            expected_large_numbers,
-                "<tool_call>\n"
-                "  <function=number_function>\n"
-                "    <parameter=big_int>\n"
-                "      999999999999999999999999999999999999999999999999999999999999\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-    }
-
-    {
-        // Qwen3-Coder template
-        auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
-        common_chat_templates_inputs inputs;
-        inputs.messages = { message_user };
-
-        common_chat_tool qwen_union_tool {
-            /* .name = */ "qwen_union",
-            /* .description = */ "Test tool for union/anyOf handling",
-            /* .parameters = */ R"({
-                "type": "object",
-                "properties": {
-                    "priority": { "type": ["number", "null"] },
-                    "maybe_text": { "anyOf": [ { "type": "string" } ] },
-                    "config": { "anyOf": [ { "type": "object" }, { "type": "null" } ] }
-                },
-                "required": []
-            })",
-        };
-        inputs.tools = { qwen_union_tool };
-
-        auto params = common_chat_templates_apply(tmpls.get(), inputs);
-        assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, params.format);
-        assert_equals(false, params.grammar.empty());
-
-        // Grammar should compile successfully
-        auto grammar = build_grammar(params.grammar);
-        GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
-    }
-}
-
-static void test_template_output_peg_parsers() {
-    printf("[%s]\n", __func__);
-
-    // JSON schemas
-    const char * invoice_schema = R"({
-        "type": "object",
-        "properties": {
-            "amount": {"type": "number"},
-            "date": {"type": "string"}
-        }
-    })";
-
-    {
-        // Ministral-3-14B-Reasoning-2512
-        auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning with reasoning_format = none
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-            t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-        });
-
-        // Test basic message and reasoning with reasoning_format = auto
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]"
-                      R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
-                      R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I need to output the invoice details in JSON[/THINK]"
-                      "```json\n"
-                      R"({"amount": 123.45, "date": "2025-12-03"})"
-                      "\n```";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
-    }
-
-    {
-        // NVIDIA Nemotron-3 Nano
-        auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning with reasoning_format = none
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.expect.content = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-        });
-
-        // Test basic message and reasoning with reasoning_format = auto
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.params.enable_thinking = true;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "I'm\nthinking\n</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter and no closing </parameter> tag
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-              "I need to output the invoice details in JSON\n"
-              "</think>\n"
-              R"({"amount": 123.45, "date": "2025-12-03"})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
-    }
-
-    {
-        // Solar-Open-100B
-        auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|><|begin|>assistant<|content|>Hello, world!\nWhat's up?";
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test basic message and reasoning_effort = low
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>Hello, world!\nWhat's up?";
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.expect = message_assist;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|tool_calls|>"
-                      "<|tool_call:begin|>123456789"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_call_id;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_thoughts_call_idx;
-        });
-
-        // Test tool call with reasoning and tool_choice = required
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-            t.expect = message_assist_thoughts_call_idx;
-        });
-
-        // Test tool call without reasoning and tool_choice = required
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.expect = message_assist_call_idx;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>"
-                      "<|tool_call:begin|>1"
-                      "<|tool_call:name|>special_function_with_opt"
-                      "<|tool_call:args|>{\"arg1\": 1, \"arg2\": 2}"
-                      "<|tool_call:end|>";
-
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.reasoning_content = "I'm\nthinking";
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        "0",
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        "1",
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I need to output the invoice details in JSON<|end|>"
-                      "<|begin|>assistant<|content|>"
-                      R"({"amount": 123.45, "date": "2025-12-03"})";
-
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
-
-        // Test response format no reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>"
-                      R"({"amount": 123.45, "date": "2025-12-03"})";
-
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.params.json_schema = invoice_schema;
-
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+                    /* .name = */ "edit",
+                    /* .description = */ "Edit a file",
+                    /* .parameters = */ R"({
+                        "type": "object",
+                        "properties": {
+                            "oldString": {
+                                "type": "string",
+                                "description": "Old string to replace."
+                            },
+                            "newString": {
+                                "type": "string",
+                                "description": "New replacement string."
+                            },
+                            "replaceAll": {
+                                "type": "boolean",
+                                "description": "Whether to replace all occurences."
+                            }
+                        },
+                        "required": ["oldString", "newString"]
+                    })",
+                }
+            })
+            .expect_reasoning("Thinking about edit...")
+            .expect_tool_calls({
+                { "edit", R"({"filePath": "file.js", "oldString": "if (part < railCount - 1) {", "newString": "if (part < 4) {", "replaceAll": false})", {} }
+            })
+            .run();
+
+        // Parallel tool calls
+        tst.test(
+               " to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}\n"
+               "<|start|>assistant to=functions.special_function_with_opt<|channel|>analysis<|message|>{\"arg1\": 1, "
+               "\"arg2\": 2}")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
     }
 }
 
 static void test_msg_diffs_compute() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     {
         common_chat_msg msg1;
 
@@ -3940,9 +2293,7 @@ static void test_msg_diffs_compute() {
         common_chat_msg_diff diff;
         diff.content_delta = "Hello, world!";
 
-        assert_equals(
-            {diff},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg1;
@@ -3954,37 +2305,35 @@ static void test_msg_diffs_compute() {
         common_chat_msg_diff diff;
         diff.content_delta = " world!";
 
-        assert_equals(
-            {diff},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg0;
 
         common_chat_msg msg1;
-        msg1.tool_calls = { { "special_function", "{\"ar", /* .id = */ "123" } };
+        msg1.tool_calls = {
+            { "special_function", "{\"ar", /* .id = */ "123" }
+        };
 
         common_chat_msg msg2;
-        msg2.tool_calls = { { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } };
+        msg2.tool_calls = {
+            { "special_function", "{\"arg1\": 1}", /* .id = */ "123" }
+        };
 
         common_chat_msg_diff diff01;
-        diff01.tool_call_index = 0;
-        diff01.tool_call_delta.name = "special_function";
-        diff01.tool_call_delta.id = "123";
+        diff01.tool_call_index           = 0;
+        diff01.tool_call_delta.name      = "special_function";
+        diff01.tool_call_delta.id        = "123";
         diff01.tool_call_delta.arguments = "{\"ar";
 
-        assert_equals(
-            {diff01},
-            common_chat_msg_diff::compute_diffs(msg0, msg1));
+        assert_equals({ diff01 }, common_chat_msg_diff::compute_diffs(msg0, msg1));
 
         common_chat_msg_diff diff12;
-        diff12.tool_call_index = 0;
+        diff12.tool_call_index           = 0;
         // Note: neither id nor name change here.
         diff12.tool_call_delta.arguments = "g1\": 1}";
 
-        assert_equals(
-            {diff12},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff12 }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg0;
@@ -3996,68 +2345,81 @@ static void test_msg_diffs_compute() {
         };
 
         common_chat_msg_diff diff1;
-        diff1.tool_call_index = 0;
-        diff1.tool_call_delta.name = "f1";
-        diff1.tool_call_delta.id = "123";
+        diff1.tool_call_index           = 0;
+        diff1.tool_call_delta.name      = "f1";
+        diff1.tool_call_delta.id        = "123";
         diff1.tool_call_delta.arguments = "{\"arg1\": 1}";
 
         common_chat_msg_diff diff2;
-        diff2.tool_call_index = 1;
-        diff2.tool_call_delta.name = "f2";
-        diff2.tool_call_delta.id = "222";
+        diff2.tool_call_index           = 1;
+        diff2.tool_call_delta.name      = "f2";
+        diff2.tool_call_delta.id        = "222";
         diff2.tool_call_delta.arguments = "{\"arg2\": 2}";
 
-        assert_equals(
-            {diff1, diff2},
-            common_chat_msg_diff::compute_diffs(msg0, msg2));
+        assert_equals({ diff1, diff2 }, common_chat_msg_diff::compute_diffs(msg0, msg2));
     }
 }
 
 int main(int argc, char ** argv) {
     common_log_set_verbosity_thold(999);
+    bool detailed_debug    = false;
+    bool only_run_filtered = false;
 
-    // try {
-#ifndef _WIN32
-        if (argc > 1) {
-            common_chat_templates_inputs inputs;
-            common_chat_msg msg;
-            msg.role = "user";
-            msg.content = "Hey";
-            inputs.messages = {msg};
-            inputs.tools = { special_function_tool };
-
-            std::cout << "| Template | Format |\n";
-            std::cout << "|----------|--------|\n";
-
-            for (int i = 1; i < argc; i++) {
-                try {
-                    std::string path = argv[i];
-                    if (path.rfind(".jinja") != path.size() - 6) {
-                        std::cerr << "Skipping non-jinja file: " << path << '\n';
-                        continue;
-                    }
-                    auto tmpls = read_templates(path);
-                    auto parts  = string_split(path, "/");
-                    auto name   = parts[parts.size() - 1];
-                    auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
-                    std::cout << "| " << name << " | " << format << " |\n";
-                } catch (const std::exception & e) {
-                    std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
-                }
-            }
-        } else
-#endif
-        {
-            test_msg_diffs_compute();
-            test_msgs_oaicompat_json_conversion();
-            test_tools_oaicompat_json_conversion();
-            test_template_output_parsers();
-            test_template_output_peg_parsers();
-            std::cout << "\n[chat] All tests passed!" << '\n';
+    // Check for --template flag
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if (arg == "--template" && i + 1 < argc) {
+            g_template_filter = argv[++i];
+            // Only run PEG parser tests with the filter
+            only_run_filtered = true;
         }
+        if (arg == "--detailed") {
+            detailed_debug = true;
+        }
+    }
+
+    if (only_run_filtered) {
+        test_template_output_peg_parsers(detailed_debug);
+        std::cout << "\n[chat] All template tests passed!" << '\n';
         return 0;
-    // } catch (const std::exception & e) {
-    //     std::cerr << "Error: " << e.what() << '\n';
-    //     return 1;
-    // }
+    }
+
+#ifndef _WIN32
+    if (argc > 1) {
+        common_chat_templates_inputs inputs;
+        common_chat_msg              msg;
+        msg.role        = "user";
+        msg.content     = "Hey";
+        inputs.messages = { msg };
+        inputs.tools    = { special_function_tool };
+
+        std::cout << "| Template | Format |\n";
+        std::cout << "|----------|--------|\n";
+
+        for (int i = 1; i < argc; i++) {
+            try {
+                std::string path = argv[i];
+                if (path.rfind(".jinja") != path.size() - 6) {
+                    std::cerr << "Skipping non-jinja file: " << path << '\n';
+                    continue;
+                }
+                auto         tmpls  = read_templates(path);
+                auto         parts  = string_split(path, "/");
+                const auto & name   = parts[parts.size() - 1];
+                const auto * format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
+                std::cout << "| " << name << " | " << format << " |\n";
+            } catch (const std::exception & e) {
+                std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
+            }
+        }
+    } else
+#endif
+    {
+        test_msg_diffs_compute();
+        test_msgs_oaicompat_json_conversion();
+        test_tools_oaicompat_json_conversion();
+        test_template_output_peg_parsers(detailed_debug);
+        std::cout << "\n[chat] All tests passed!" << '\n';
+    }
+    return 0;
 }
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 518f8b9ae7..7c63b3aae5 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -26,6 +26,7 @@ else()
         add_subdirectory(server)
     endif()
     add_subdirectory(tokenize)
+    add_subdirectory(parser)
     add_subdirectory(tts)
     add_subdirectory(mtmd)
     if (GGML_RPC)
diff --git a/tools/parser/CMakeLists.txt b/tools/parser/CMakeLists.txt
new file mode 100644
index 0000000000..4bf40a8717
--- /dev/null
+++ b/tools/parser/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(TARGET llama-debug-template-parser)
+add_executable(${TARGET} debug-template-parser.cpp)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+if(LLAMA_TOOLS_INSTALL)
+    install(TARGETS ${TARGET} RUNTIME)
+endif()
diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp
new file mode 100644
index 0000000000..551d2bcf9d
--- /dev/null
+++ b/tools/parser/debug-template-parser.cpp
@@ -0,0 +1,531 @@
+#include "../src/llama-grammar.h"
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "common.h"
+#include "gguf.h"
+#include "log.h"
+
+#include <fstream>
+#include <sstream>
+#include <string>
+
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::ordered_json;
+
+enum class output_mode {
+    ANALYSIS,  // Only output analysis results (default)
+    TEMPLATE,  // Only output rendered template
+    BOTH       // Output both
+};
+
+enum class input_message_type {
+    NONE,                    // Don't render any message scenarios (only analysis)
+    CONTENT_ONLY,            // Simple assistant message with content
+    REASONING_CONTENT,       // Message with reasoning_content + content
+    TOOL_CALL_ONLY,          // Message with tool_calls only
+    CONTENT_TOOL_CALL,       // Message with content + tool_calls
+    REASONING_TOOL_CALL,     // Message with reasoning_content + tool_calls
+    CONTENT_FAKE_TOOL_CALL,  // Message with content but no actual tool_calls (for testing)
+    ALL                      // Render all scenarios
+};
+
+struct debug_options {
+    std::string      template_path;
+    bool             with_tools        = true;
+    bool             generation_prompt = true;
+    bool             enable_reasoning  = true;
+    output_mode       mode             = output_mode::BOTH;
+    input_message_type input_message     = input_message_type::NONE;
+};
+
+static std::string read_file(const std::string & path) {
+    std::ifstream fin(path, std::ios::binary);
+    if (!fin.is_open()) {
+        throw std::runtime_error("Could not open file: " + path);
+    }
+    std::ostringstream buf;
+    buf << fin.rdbuf();
+    return buf.str();
+}
+
+static std::string read_gguf_chat_template(const std::string & path) {
+    struct gguf_init_params params = { /*no_alloc =*/true,  // We only need metadata, not tensor data
+                                       /*ctx=*/nullptr };
+
+    struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
+    if (ctx == nullptr) {
+        throw std::runtime_error("Could not open GGUF file: " + path);
+    }
+
+    const char * key    = "tokenizer.chat_template";
+    int64_t      key_id = gguf_find_key(ctx, key);
+
+    if (key_id == -1) {
+        gguf_free(ctx);
+        throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key));
+    }
+
+    const char * template_str = gguf_get_val_str(ctx, key_id);
+    if (template_str == nullptr) {
+        gguf_free(ctx);
+        throw std::runtime_error("GGUF file contains chat template key but value is null");
+    }
+
+    std::string result = template_str;
+    gguf_free(ctx);
+    return result;
+}
+
+static void print_usage(const char * program_name) {
+    LOG_ERR("Usage: %s <template_or_gguf_path> [options]\n", program_name);
+    LOG_ERR("\nOptions:\n");
+    LOG_ERR("  --no-tools              Disable tool definitions\n");
+    LOG_ERR("  --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n");
+    LOG_ERR("  --enable-reasoning=0|1  Enable reasoning parsing (default: 1)\n");
+    LOG_ERR("  --output=MODE           Output mode: analysis, template, both (default: both)\n");
+    LOG_ERR("  --input-message=TYPE    Message type to render:\n");
+    LOG_ERR("                          content_only, reasoning_content, tool_call_only,\n");
+    LOG_ERR("                          content_tool_call, reasoning_tool_call,\n");
+    LOG_ERR("                          content_fake_tool_call, all\n");
+    LOG_ERR("\nExamples:\n");
+    LOG_ERR("  %s template.jinja --input-message=all --generation-prompt=1\n", program_name);
+    LOG_ERR("  %s template.jinja --output=template --input-message=tool_call_only\n", program_name);
+}
+
+static bool parse_bool_option(const std::string & value) {
+    return value == "1" || value == "true" || value == "yes";
+}
+
+static bool parse_options(int argc, char ** argv, debug_options & opts) {
+    if (argc < 2) {
+        print_usage(argv[0]);
+        return false;
+    }
+
+    opts.template_path = argv[1];
+
+    for (int i = 2; i < argc; ++i) {
+        std::string arg = argv[i];
+
+        if (arg == "--no-tools") {
+            opts.with_tools = false;
+        } else if (arg.rfind("--generation-prompt=", 0) == 0) {
+            opts.generation_prompt = parse_bool_option(arg.substr(20));
+        } else if (arg.rfind("--enable-reasoning=", 0) == 0) {
+            opts.enable_reasoning = parse_bool_option(arg.substr(19));
+        } else if (arg.rfind("--output=", 0) == 0) {
+            std::string mode = arg.substr(9);
+            if (mode == "analysis") {
+                opts.mode = output_mode::ANALYSIS;
+            } else if (mode == "template") {
+                opts.mode = output_mode::TEMPLATE;
+            } else if (mode == "both") {
+                opts.mode = output_mode::BOTH;
+            } else {
+                LOG_ERR("Unknown output mode: %s\n", mode.c_str());
+                return false;
+            }
+        } else if (arg.rfind("--input-message=", 0) == 0) {
+            std::string type = arg.substr(16);
+            if (type == "content_only") {
+                opts.input_message = input_message_type::CONTENT_ONLY;
+            } else if (type == "reasoning_content") {
+                opts.input_message = input_message_type::REASONING_CONTENT;
+            } else if (type == "tool_call_only") {
+                opts.input_message = input_message_type::TOOL_CALL_ONLY;
+            } else if (type == "content_tool_call") {
+                opts.input_message = input_message_type::CONTENT_TOOL_CALL;
+            } else if (type == "reasoning_tool_call") {
+                opts.input_message = input_message_type::REASONING_TOOL_CALL;
+            } else if (type == "content_fake_tool_call") {
+                opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL;
+            } else if (type == "all") {
+                opts.input_message = input_message_type::ALL;
+            } else {
+                LOG_ERR("Unknown input message type: %s\n", type.c_str());
+                return false;
+            }
+        } else {
+            LOG_ERR("Unknown option: %s\n", arg.c_str());
+            print_usage(argv[0]);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static json build_user_message() {
+    return json{
+        { "role",    "user"                               },
+        { "content", "Hello, please help me with a task." }
+    };
+}
+
+static json build_content_only_message() {
+    return json{
+        { "role",    "assistant"                                   },
+        { "content", "Hello! I'm here to help you with your task." }
+    };
+}
+
+static json build_reasoning_content_message() {
+    return json{
+        { "role",              "assistant"                                                               },
+        { "content",           "Hello! I'm here to help you with your task."                             },
+        { "reasoning_content", "The user is greeting me and asking for help. I should respond politely." }
+    };
+}
+
+static json build_tool_call_only_message() {
+    return json{
+        { "role",       "assistant"      },
+        { "content",    nullptr          },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function", json{ { "name", "test_function_name" },
+                                  { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } },
+              { "id", "123456789" } } }) }
+    };
+}
+
+static json build_content_tool_call_message() {
+    return json{
+        { "role",       "assistant"                                                                              },
+        { "content",    "I'll help you by calling a function."                                                   },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function",
+                json{ { "name", "test_function_name" },
+                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+    };
+}
+
+static json build_reasoning_tool_call_message() {
+    return json{
+        { "role",              "assistant"                                                                       },
+        { "content",           nullptr                                                                           },
+        { "reasoning_content", "I need to call a function to help with this task."                               },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function",
+                json{ { "name", "test_function_name" },
+                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+    };
+}
+
+static json build_content_fake_tool_call_message() {
+    // This message has content but NO tool_calls field
+    // It's used to test if a template renders tool definitions but not tool calls
+    return json{
+        { "role",    "assistant"                            },
+        { "content", "I'll help you by calling a function." }
+    };
+}
+
+static json build_tools_definition() {
+    json parameters_schema                    = json::object();
+    parameters_schema["type"]                 = "object";
+    parameters_schema["properties"]           = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        { "type",        "string"          },
+        { "description", "First parameter" }
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        { "type",        "string"           },
+        { "description", "Second parameter" }
+    });
+    parameters_schema["required"]             = json::array({ "param1", "param2" });
+
+    return json::array({
+        json{ { "type", "function" },
+             { "function", json{ { "name", "test_function_name" },
+                                  { "description", "A test function for debugging" },
+                                  { "parameters", parameters_schema } } } }
+    });
+}
+
+static void render_scenario(const common_chat_template & tmpl,
+                            const std::string &          scenario_name,
+                            const json &                 messages,
+                            const json &                 tools,
+                            bool                         add_generation_prompt,
+                            bool                         enable_thinking) {
+    LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str());
+    LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false",
+            enable_thinking ? "true" : "false");
+
+    // When add_generation_prompt is true, add a trailing user message to trigger the prompt
+    json final_messages = messages;
+    if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") {
+        final_messages.push_back(json{
+            { "role",    "user" },
+            { "content", "Now please continue with another response." }
+        });
+    }
+
+    LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
+
+    try {
+        templates_params inputs;
+        inputs.messages                         = final_messages;
+        inputs.add_generation_prompt            = add_generation_prompt;
+        inputs.extra_context["enable_thinking"] = enable_thinking;
+
+        if (!tools.is_null() && tools.is_array() && !tools.empty()) {
+            inputs.tools = tools;
+        }
+
+        std::string output = common_chat_template_direct_apply(tmpl, inputs);
+
+        LOG_ERR("\n--- Rendered Output ---\n");
+        LOG_ERR("%s\n", output.c_str());
+        LOG_ERR("--- End Output (length: %zu) ---\n", output.length());
+    } catch (const std::exception & e) {
+        LOG_ERR("Rendering failed: %s\n", e.what());
+    }
+}
+
+static void render_all_scenarios(const common_chat_template & tmpl,
+                                 const json &                 tools,
+                                 bool                         add_generation_prompt,
+                                 bool                         enable_thinking,
+                                 input_message_type             message_type) {
+    json user_msg = build_user_message();
+
+    auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) {
+        if (message_type == input_message_type::ALL || message_type == type) {
+            json messages = json::array({ user_msg, assistant_msg });
+            render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking);
+        }
+    };
+
+    render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message());
+    render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message());
+    render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message());
+    render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message());
+    render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message());
+    render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call",
+              build_content_fake_tool_call_message());
+
+    // Also render with add_generation_prompt=true to show the prompt ending
+    if (message_type == input_message_type::ALL) {
+        LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n");
+
+        json prompt_messages = json::array({ user_msg });
+        render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking);
+
+        // With enable_thinking toggled
+        render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false);
+    }
+}
+
+static const char * reasoning_mode_to_str(content_structure::reasoning_mode_type mode) {
+    switch (mode) {
+        case content_structure::REASONING_NONE:
+            return "NONE";
+        case content_structure::REASONING_OPTIONAL:
+            return "OPTIONAL";
+        case content_structure::REASONING_FORCED_OPEN:
+            return "FORCED_OPEN";
+    }
+    return "UNKNOWN";
+}
+
+static const char * content_mode_to_str(content_structure::content_mode_type mode) {
+    switch (mode) {
+        case content_structure::CONTENT_PLAIN:
+            return "PLAIN";
+        case content_structure::CONTENT_ALWAYS_WRAPPED:
+            return "ALWAYS_WRAPPED";
+        case content_structure::CONTENT_WRAPPED_WITH_REASONING:
+            return "WRAPPED_WITH_REASONING";
+    }
+    return "UNKNOWN";
+}
+
+static const char * function_format_to_str(enum tool_call_structure::function_format fmt) {
+    switch (fmt) {
+        case tool_call_structure::FUNC_JSON_OBJECT:
+            return "JSON_OBJECT";
+        case tool_call_structure::FUNC_TAG_WITH_NAME:
+            return "TAG_WITH_NAME";
+        case tool_call_structure::FUNC_TAG_NAME_ONLY:
+            return "TAG_NAME_ONLY";
+        case tool_call_structure::FUNC_PREFIXED_INDEXED:
+            return "PREFIXED_INDEXED";
+        case tool_call_structure::FUNC_NAME_AS_KEY:
+            return "NAME_AS_KEY";
+        case tool_call_structure::FUNC_BRACKET_TAG:
+            return "BRACKET_TAG";
+        case tool_call_structure::FUNC_RECIPIENT_BASED:
+            return "RECIPIENT_BASED";
+        case tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK:
+            return "MARKDOWN_CODE_BLOCK";
+    }
+    return "UNKNOWN";
+}
+
+static const char * argument_format_to_str(enum tool_call_structure::argument_format fmt) {
+    switch (fmt) {
+        case tool_call_structure::ARGS_JSON:
+            return "JSON";
+        case tool_call_structure::ARGS_TAGGED:
+            return "TAGGED";
+        case tool_call_structure::ARGS_KEY_VALUE_TAGS:
+            return "KEY_VALUE_TAGS";
+    }
+    return "UNKNOWN";
+}
+
+int main(int argc, char ** argv) {
+    // Set log level to most verbose to capture all debug output
+    common_log_set_verbosity_thold(99);
+
+    debug_options opts;
+    if (!parse_options(argc, argv, opts)) {
+        return 1;
+    }
+
+    std::string template_source;
+    try {
+        // Check if the file is a GGUF file
+        if (opts.template_path.size() >= 5 &&
+            opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) {
+            template_source = read_gguf_chat_template(opts.template_path);
+        } else {
+            template_source = read_file(opts.template_path);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERR("Error reading template: %s\n", e.what());
+        return 1;
+    }
+
+    LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str());
+    LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false",
+            opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false");
+
+    try {
+        common_chat_template chat_template(template_source, "", "");
+
+        // Build tools definition
+        json tools = opts.with_tools ? build_tools_definition() : json();
+
+        // Render template scenarios if requested
+        if (opts.input_message != input_message_type::NONE &&
+            (opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) {
+            LOG_ERR("\n");
+            LOG_ERR("================================================================================\n");
+            LOG_ERR("                         TEMPLATE RENDERING OUTPUT\n");
+            LOG_ERR("================================================================================\n");
+
+            render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning,
+                                 opts.input_message);
+        }
+
+        // Output analysis if requested
+        if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) {
+            LOG_ERR("\n");
+            LOG_ERR("================================================================================\n");
+            LOG_ERR("                           TEMPLATE ANALYSIS\n");
+            LOG_ERR("================================================================================\n");
+
+            template_analysis_result analysis = template_analyzer::analyze_template(chat_template);
+
+            LOG_ERR("\n=== Analysis Results ===\n");
+
+            LOG_ERR("\n--- Content Structure (Phase 1) ---\n");
+            LOG_ERR("reasoning_mode: %s\n", reasoning_mode_to_str(analysis.content.reasoning_mode));
+            LOG_ERR("reasoning_start: '%s'\n", analysis.content.reasoning_start.c_str());
+            LOG_ERR("reasoning_end: '%s'\n", analysis.content.reasoning_end.c_str());
+            LOG_ERR("content_mode: %s\n", content_mode_to_str(analysis.content.content_mode));
+            LOG_ERR("content_start: '%s'\n", analysis.content.content_start.c_str());
+            LOG_ERR("content_end: '%s'\n", analysis.content.content_end.c_str());
+
+            LOG_ERR("\n--- Tool Structure (Phase 2) ---\n");
+            LOG_ERR("supports_tools: %s\n", analysis.tools.supports_tools ? "true" : "false");
+            LOG_ERR("function_format: %s\n", function_format_to_str(analysis.tools.function_format));
+            LOG_ERR("argument_format: %s\n", argument_format_to_str(analysis.tools.argument_format));
+            LOG_ERR("tool_section_start: '%s'\n", analysis.tools.tool_section_start.c_str());
+            LOG_ERR("tool_section_end: '%s'\n", analysis.tools.tool_section_end.c_str());
+            LOG_ERR("function_prefix: '%s'\n", analysis.tools.function_prefix.c_str());
+            LOG_ERR("function_suffix: '%s'\n", analysis.tools.function_suffix.c_str());
+            LOG_ERR("function_close: '%s'\n", analysis.tools.function_close.c_str());
+            LOG_ERR("arg_prefix: '%s'\n", analysis.tools.arg_prefix.c_str());
+            LOG_ERR("arg_suffix: '%s'\n", analysis.tools.arg_suffix.c_str());
+            LOG_ERR("arg_close: '%s'\n", analysis.tools.arg_close.c_str());
+            LOG_ERR("name_field: '%s'\n", analysis.tools.name_field.c_str());
+            LOG_ERR("args_field: '%s'\n", analysis.tools.args_field.c_str());
+            LOG_ERR("id_field: '%s'\n", analysis.tools.id_field.c_str());
+
+            // Additional fields for special formats
+            if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
+                LOG_ERR("\n--- Prefixed-Indexed Format Details ---\n");
+                LOG_ERR("per_call_start: '%s'\n", analysis.tools.per_call_start.c_str());
+                LOG_ERR("function_namespace: '%s'\n", analysis.tools.function_namespace.c_str());
+                LOG_ERR("args_marker: '%s'\n", analysis.tools.args_marker.c_str());
+                LOG_ERR("per_call_end: '%s'\n", analysis.tools.per_call_end.c_str());
+            }
+            if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
+                LOG_ERR("\n--- Bracket-Tag Format Details ---\n");
+                LOG_ERR("per_call_start: '%s'\n", analysis.tools.per_call_start.c_str());
+                LOG_ERR("id_marker: '%s'\n", analysis.tools.id_marker.c_str());
+                LOG_ERR("args_marker: '%s'\n", analysis.tools.args_marker.c_str());
+            }
+
+            // Generate Parser
+            templates_params params;
+            params.messages = json::array();
+            params.reasoning_format =
+                opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
+            params.enable_thinking       = opts.enable_reasoning;
+            params.add_generation_prompt = opts.generation_prompt;
+
+            if (opts.with_tools) {
+                params.tools       = tools;
+                params.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+            } else {
+                params.tools       = json();
+                params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
+            }
+            params.parallel_tool_calls = false;
+
+            auto parser_data = universal_peg_generator::generate_parser(analysis, chat_template, params);
+
+            LOG_ERR("\n=== Generated Parser ===\n");
+            LOG_ERR("%s\n", json::parse(parser_data.parser).dump(4).c_str());
+
+            LOG_ERR("\n=== Generated Grammar ===\n");
+            LOG_ERR("%s\n", parser_data.grammar.c_str());
+
+            LOG_ERR("\n=== Generated Lazy Grammar ===\n");
+            LOG_ERR("%d\n", parser_data.grammar_lazy);
+
+            LOG_ERR("\n=== Generated Grammar Triggers ===\n");
+            for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) {
+                LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str());
+            }
+
+            LOG_ERR("\n=== Preserved Tokens ===\n");
+            for (const std::string & token : parser_data.preserved_tokens) {
+                LOG_ERR("  '%s'\n", token.c_str());
+            }
+
+            LOG_ERR("\n=== Verifying created grammar ===\n");
+            auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root",
+                                                     parser_data.grammar_lazy, nullptr, 0, nullptr, 0);
+            if (grammar != nullptr) {
+                LOG_ERR("\n=== Grammar successfully created ===\n");
+            }
+        }
+    } catch (const std::exception & e) {
+        LOG_ERR("Analysis failed: %s\n", e.what());
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 7f9c3c566b..b6f0333a38 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -15,6 +15,7 @@
 #include <cstddef>
 #include <cinttypes>
 #include <memory>
+#include <stdexcept>
 #include <filesystem>
 
 // fix problem with std::min and std::max
@@ -2741,7 +2742,15 @@ private:
 
                 slot.i_batch = -1;
 
-                common_sampler_accept(slot.smpl.get(), id, true);
+                try {
+                    common_sampler_accept(slot.smpl.get(), id, true);
+                } catch (std::runtime_error & e) {
+                    SLT_ERR(slot, "Error when accepting token for sampler: %s\n", e.what());
+                    send_error(slot, std::string("Error when accepting token for sampler: ") + e.what(), ERROR_TYPE_SERVER);
+                    slot.release();
+                    slot.i_batch = -1;
+                    continue; // continue loop of slots
+                }
 
                 // here we have synchronized the llama_context (due to the sampling above), so we can do time measurement
                 const int64_t t_current = ggml_time_us();
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 2d25db63b7..a2bc514bf0 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -1,12 +1,12 @@
-#include "server-common.h"
 #include "server-task.h"
 
-#include "common.h"
-#include "llama.h"
 #include "chat.h"
+#include "common.h"
+#include "json-schema-to-grammar.h"
+#include "llama.h"
 #include "sampling.h"
 #include "speculative.h"
-#include "json-schema-to-grammar.h"
+#include "server-common.h"
 
 using json = nlohmann::ordered_json;
 
@@ -18,8 +18,8 @@ json task_params::format_logit_bias(const std::vector<llama_logit_bias> & logit_
     json data = json::array();
     for (const auto & lb : logit_bias) {
         data.push_back(json{
-            {"bias", lb.bias},
-            {"token", lb.token},
+            { "bias",  lb.bias  },
+            { "token", lb.token },
         });
     }
     return data;
@@ -34,41 +34,44 @@ json task_params::to_json(bool only_metrics) const {
 
     json lora = json::array();
     for (auto & it : this->lora) {
-        lora.push_back({{"id", it.first}, {"scale", it.second}});
+        lora.push_back({
+            { "id",    it.first  },
+            { "scale", it.second }
+        });
     }
 
     if (only_metrics) {
-        return json {
-            {"seed",                      sampling.seed},
-            {"temperature",               sampling.temp},
-            {"dynatemp_range",            sampling.dynatemp_range},
-            {"dynatemp_exponent",         sampling.dynatemp_exponent},
-            {"top_k",                     sampling.top_k},
-            {"top_p",                     sampling.top_p},
-            {"min_p",                     sampling.min_p},
-            {"top_n_sigma",               sampling.top_n_sigma},
-            {"xtc_probability",           sampling.xtc_probability},
-            {"xtc_threshold",             sampling.xtc_threshold},
-            {"typical_p",                 sampling.typ_p},
-            {"repeat_last_n",             sampling.penalty_last_n},
-            {"repeat_penalty",            sampling.penalty_repeat},
-            {"presence_penalty",          sampling.penalty_present},
-            {"frequency_penalty",         sampling.penalty_freq},
-            {"dry_multiplier",            sampling.dry_multiplier},
-            {"dry_base",                  sampling.dry_base},
-            {"dry_allowed_length",        sampling.dry_allowed_length},
-            {"dry_penalty_last_n",        sampling.dry_penalty_last_n},
-            {"mirostat",                  sampling.mirostat},
-            {"mirostat_tau",              sampling.mirostat_tau},
-            {"mirostat_eta",              sampling.mirostat_eta},
-            {"max_tokens",                n_predict},
-            {"n_predict",                 n_predict}, // TODO: deduplicate?
-            {"n_keep",                    n_keep},
-            {"n_discard",                 n_discard},
-            {"ignore_eos",                sampling.ignore_eos},
-            {"stream",                    stream},
-            {"n_probs",                   sampling.n_probs},
-            {"min_keep",                  sampling.min_keep},
+        return json{
+            { "seed",                 sampling.seed                                                        },
+            { "temperature",          sampling.temp                                                        },
+            { "dynatemp_range",       sampling.dynatemp_range                                              },
+            { "dynatemp_exponent",    sampling.dynatemp_exponent                                           },
+            { "top_k",                sampling.top_k                                                       },
+            { "top_p",                sampling.top_p                                                       },
+            { "min_p",                sampling.min_p                                                       },
+            { "top_n_sigma",          sampling.top_n_sigma                                                 },
+            { "xtc_probability",      sampling.xtc_probability                                             },
+            { "xtc_threshold",        sampling.xtc_threshold                                               },
+            { "typical_p",            sampling.typ_p                                                       },
+            { "repeat_last_n",        sampling.penalty_last_n                                              },
+            { "repeat_penalty",       sampling.penalty_repeat                                              },
+            { "presence_penalty",     sampling.penalty_present                                             },
+            { "frequency_penalty",    sampling.penalty_freq                                                },
+            { "dry_multiplier",       sampling.dry_multiplier                                              },
+            { "dry_base",             sampling.dry_base                                                    },
+            { "dry_allowed_length",   sampling.dry_allowed_length                                          },
+            { "dry_penalty_last_n",   sampling.dry_penalty_last_n                                          },
+            { "mirostat",             sampling.mirostat                                                    },
+            { "mirostat_tau",         sampling.mirostat_tau                                                },
+            { "mirostat_eta",         sampling.mirostat_eta                                                },
+            { "max_tokens",           n_predict                                                            },
+            { "n_predict",            n_predict                                                            }, // TODO: deduplicate?
+            { "n_keep",               n_keep                                                               },
+            { "n_discard",            n_discard                                                            },
+            { "ignore_eos",           sampling.ignore_eos                                                  },
+            { "stream",               stream                                                               },
+            { "n_probs",              sampling.n_probs                                                     },
+            { "min_keep",             sampling.min_keep                                                    },
             {"chat_format",               common_chat_format_name(chat_parser_params.format)},
             {"reasoning_format",          common_reasoning_format_name(chat_parser_params.reasoning_format)},
             {"reasoning_in_content",      chat_parser_params.reasoning_in_content},
@@ -95,44 +98,44 @@ json task_params::to_json(bool only_metrics) const {
         grammar_triggers.push_back(ct.to_json());
     }
 
-    return json {
-        {"seed",                      sampling.seed},
-        {"temperature",               sampling.temp},
-        {"dynatemp_range",            sampling.dynatemp_range},
-        {"dynatemp_exponent",         sampling.dynatemp_exponent},
-        {"top_k",                     sampling.top_k},
-        {"top_p",                     sampling.top_p},
-        {"min_p",                     sampling.min_p},
-        {"top_n_sigma",               sampling.top_n_sigma},
-        {"xtc_probability",           sampling.xtc_probability},
-        {"xtc_threshold",             sampling.xtc_threshold},
-        {"typical_p",                 sampling.typ_p},
-        {"repeat_last_n",             sampling.penalty_last_n},
-        {"repeat_penalty",            sampling.penalty_repeat},
-        {"presence_penalty",          sampling.penalty_present},
-        {"frequency_penalty",         sampling.penalty_freq},
-        {"dry_multiplier",            sampling.dry_multiplier},
-        {"dry_base",                  sampling.dry_base},
-        {"dry_allowed_length",        sampling.dry_allowed_length},
-        {"dry_penalty_last_n",        sampling.dry_penalty_last_n},
-        {"dry_sequence_breakers",     sampling.dry_sequence_breakers},
-        {"mirostat",                  sampling.mirostat},
-        {"mirostat_tau",              sampling.mirostat_tau},
-        {"mirostat_eta",              sampling.mirostat_eta},
-        {"stop",                      antiprompt},
-        {"max_tokens",                n_predict},
-        {"n_predict",                 n_predict}, // TODO: deduplicate?
-        {"n_keep",                    n_keep},
-        {"n_discard",                 n_discard},
-        {"ignore_eos",                sampling.ignore_eos},
-        {"stream",                    stream},
-        {"logit_bias",                format_logit_bias(sampling.logit_bias)},
-        {"n_probs",                   sampling.n_probs},
-        {"min_keep",                  sampling.min_keep},
-        {"grammar",                   sampling.grammar},
-        {"grammar_lazy",              sampling.grammar_lazy},
-        {"grammar_triggers",          grammar_triggers},
-        {"preserved_tokens",          sampling.preserved_tokens},
+    return json{
+        { "seed",                  sampling.seed                                                        },
+        { "temperature",           sampling.temp                                                        },
+        { "dynatemp_range",        sampling.dynatemp_range                                              },
+        { "dynatemp_exponent",     sampling.dynatemp_exponent                                           },
+        { "top_k",                 sampling.top_k                                                       },
+        { "top_p",                 sampling.top_p                                                       },
+        { "min_p",                 sampling.min_p                                                       },
+        { "top_n_sigma",           sampling.top_n_sigma                                                 },
+        { "xtc_probability",       sampling.xtc_probability                                             },
+        { "xtc_threshold",         sampling.xtc_threshold                                               },
+        { "typical_p",             sampling.typ_p                                                       },
+        { "repeat_last_n",         sampling.penalty_last_n                                              },
+        { "repeat_penalty",        sampling.penalty_repeat                                              },
+        { "presence_penalty",      sampling.penalty_present                                             },
+        { "frequency_penalty",     sampling.penalty_freq                                                },
+        { "dry_multiplier",        sampling.dry_multiplier                                              },
+        { "dry_base",              sampling.dry_base                                                    },
+        { "dry_allowed_length",    sampling.dry_allowed_length                                          },
+        { "dry_penalty_last_n",    sampling.dry_penalty_last_n                                          },
+        { "dry_sequence_breakers", sampling.dry_sequence_breakers                                       },
+        { "mirostat",              sampling.mirostat                                                    },
+        { "mirostat_tau",          sampling.mirostat_tau                                                },
+        { "mirostat_eta",          sampling.mirostat_eta                                                },
+        { "stop",                  antiprompt                                                           },
+        { "max_tokens",            n_predict                                                            },
+        { "n_predict",             n_predict                                                            }, // TODO: deduplicate?
+        { "n_keep",                n_keep                                                               },
+        { "n_discard",             n_discard                                                            },
+        { "ignore_eos",            sampling.ignore_eos                                                  },
+        { "stream",                stream                                                               },
+        { "logit_bias",            format_logit_bias(sampling.logit_bias)                               },
+        { "n_probs",               sampling.n_probs                                                     },
+        { "min_keep",              sampling.min_keep                                                    },
+        { "grammar",               sampling.grammar                                                     },
+        { "grammar_lazy",          sampling.grammar_lazy                                                },
+        { "grammar_triggers",      grammar_triggers                                                     },
+        { "preserved_tokens",      sampling.preserved_tokens                                            },
         {"chat_format",               common_chat_format_name(chat_parser_params.format)},
         {"reasoning_format",          common_reasoning_format_name(chat_parser_params.reasoning_format)},
         {"reasoning_in_content",      chat_parser_params.reasoning_in_content},
@@ -156,21 +159,75 @@ json task_params::to_json(bool only_metrics) const {
 //
 // task_result_state
 //
-common_chat_msg task_result_state::update_chat_msg(
-        const std::string & text_added,
-        bool is_partial,
-        std::vector<common_chat_msg_diff> & diffs) {
+common_chat_msg task_result_state::update_chat_msg(const std::string &                 text_added,
+                                                   bool                                is_partial,
+                                                   std::vector<common_chat_msg_diff> & diffs,
+                                                   bool                                filter_tool_calls) {
     generated_text += text_added;
     auto msg_prv_copy = chat_msg;
     SRV_DBG("Parsing chat message: %s\n", generated_text.c_str());
-    auto new_msg = common_chat_parse(
-        generated_text,
-        is_partial,
-        chat_parser_params);
+    auto new_msg = common_chat_parse(generated_text, is_partial, chat_parser_params);
     if (!new_msg.empty()) {
         new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id);
-        chat_msg = new_msg;
-        diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, new_msg.empty() ? msg_prv_copy : new_msg);
+        chat_msg       = new_msg;
+        auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg);
+
+        if (!filter_tool_calls) {
+            diffs = std::move(all_diffs);
+        } else {
+            for (auto & d : all_diffs) {
+                // If this is a new type of delta, flush all currently pending tool call names
+                for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+                    if (sent_tool_call_names.count(i) || chat_msg.tool_calls[i].name.empty()) {
+                        continue;
+                    }
+                    if (d.tool_call_index != i || !d.tool_call_delta.arguments.empty()) {
+                        common_chat_msg_diff header;
+                        header.tool_call_index      = i;
+                        header.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                        header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                        diffs.push_back(std::move(header));
+                        sent_tool_call_names.insert(i);
+                    }
+                }
+
+                if (d.tool_call_index == std::string::npos) {
+                    diffs.push_back(std::move(d));
+                } else {
+                    size_t i = d.tool_call_index;
+                    if (sent_tool_call_names.count(i)) {
+                        if (!d.tool_call_delta.arguments.empty()) {
+                            d.tool_call_delta.name = "";
+                            d.tool_call_delta.id   = "";
+                            diffs.push_back(std::move(d));
+                        }
+                    } else {
+                        // Not sent yet.
+                        if (!d.tool_call_delta.arguments.empty() || !is_partial) {
+                            d.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                            d.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                            diffs.push_back(std::move(d));
+                            sent_tool_call_names.insert(i);
+                        } else {
+                            // Suppress
+                        }
+                    }
+                }
+            }
+            // Final check at EOF
+            if (!is_partial) {
+                for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+                    if (!sent_tool_call_names.count(i) && !chat_msg.tool_calls[i].name.empty()) {
+                        common_chat_msg_diff header;
+                        header.tool_call_index      = i;
+                        header.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                        header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                        diffs.push_back(std::move(header));
+                        sent_tool_call_names.insert(i);
+                    }
+                }
+            }
+        }
     }
     return chat_msg;
 }
@@ -179,11 +236,10 @@ common_chat_msg task_result_state::update_chat_msg(
 // server_task
 //
 
-task_params server_task::params_from_json_cmpl(
-        const llama_vocab * vocab,
-        const common_params & params_base,
-        const int n_ctx_slot,
-        const json & data) {
+task_params server_task::params_from_json_cmpl(const llama_vocab *   vocab,
+                                               const common_params & params_base,
+                                               const int             n_ctx_slot,
+                                               const json &          data) {
     task_params params;
 
     // Sampling parameter defaults are loaded from the global server context (but individual requests can still them)
@@ -213,8 +269,8 @@ task_params server_task::params_from_json_cmpl(
     params.n_cmpl           = json_value(data,       "n_cmpl",             json_value(data, "n", 1));
     params.n_cache_reuse    = json_value(data,       "n_cache_reuse",      defaults.n_cache_reuse);
     //params.t_max_prompt_ms  = json_value(data,       "t_max_prompt_ms",    defaults.t_max_prompt_ms); // TODO: implement
-    params.t_max_predict_ms = json_value(data,       "t_max_predict_ms",   defaults.t_max_predict_ms);
-    params.response_fields  = json_value(data,       "response_fields",    std::vector<std::string>());
+    params.t_max_predict_ms = json_value(data, "t_max_predict_ms", defaults.t_max_predict_ms);
+    params.response_fields  = json_value(data, "response_fields", std::vector<std::string>());
 
     params.sampling.top_k              = json_value(data, "top_k",               defaults.sampling.top_k);
     params.sampling.top_p              = json_value(data, "top_p",               defaults.sampling.top_p);
@@ -266,7 +322,7 @@ task_params server_task::params_from_json_cmpl(
     params.speculative.ngram_min_hits   = std::max(std::min(1, (int) params.speculative.ngram_min_hits),   1024);
 
     // Use OpenAI API logprobs only if n_probs wasn't provided
-    if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs){
+    if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs) {
         params.sampling.n_probs = json_value(data, "logprobs", defaults.sampling.n_probs);
     }
 
@@ -309,7 +365,8 @@ task_params server_task::params_from_json_cmpl(
         // Ref: https://github.com/oobabooga/text-generation-webui/blob/d1af7a41ade7bd3c3a463bfa640725edb818ebaf/extensions/openai/typing.py#L39
 
         if (data.contains("dry_sequence_breakers")) {
-            params.sampling.dry_sequence_breakers = json_value(data, "dry_sequence_breakers", std::vector<std::string>());
+            params.sampling.dry_sequence_breakers =
+                json_value(data, "dry_sequence_breakers", std::vector<std::string>());
             if (params.sampling.dry_sequence_breakers.empty()) {
                 throw std::runtime_error("Error: dry_sequence_breakers must be a non-empty array of strings");
             }
@@ -319,15 +376,15 @@ task_params server_task::params_from_json_cmpl(
     // process "json_schema" and "grammar"
     if (data.contains("json_schema") && !data.contains("grammar")) {
         try {
-            auto schema                  = json_value(data, "json_schema", json::object());
+            auto schema = json_value(data, "json_schema", json::object());
             SRV_DBG("JSON schema: %s\n", schema.dump(2).c_str());
-            params.sampling.grammar      = json_schema_to_grammar(schema);
+            params.sampling.grammar = json_schema_to_grammar(schema);
             SRV_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str());
         } catch (const std::exception & e) {
             throw std::runtime_error(std::string("\"json_schema\": ") + e.what());
         }
     } else {
-        params.sampling.grammar      = json_value(data, "grammar", defaults.sampling.grammar);
+        params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar);
         SRV_DBG("Grammar: %s\n", params.sampling.grammar.c_str());
         params.sampling.grammar_lazy = json_value(data, "grammar_lazy", defaults.sampling.grammar_lazy);
         SRV_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false");
@@ -346,9 +403,10 @@ task_params server_task::params_from_json_cmpl(
             reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get<std::string>());
         }
         params.chat_parser_params.reasoning_format = reasoning_format;
-        params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
+        params.chat_parser_params.reasoning_in_content =
+            params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
         params.chat_parser_params.thinking_forced_open = json_value(data, "thinking_forced_open", false);
-        params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false);
+        params.chat_parser_params.parse_tool_calls     = json_value(data, "parse_tool_calls", false);
         if (data.contains("chat_parser")) {
             params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());
         }
@@ -358,7 +416,8 @@ task_params server_task::params_from_json_cmpl(
         const auto preserved_tokens = data.find("preserved_tokens");
         if (preserved_tokens != data.end()) {
             for (const auto & t : *preserved_tokens) {
-                auto ids = common_tokenize(vocab, t.get<std::string>(), /* add_special= */ false, /* parse_special= */ true);
+                auto ids =
+                    common_tokenize(vocab, t.get<std::string>(), /* add_special= */ false, /* parse_special= */ true);
                 if (ids.size() == 1) {
                     SRV_DBG("Preserved token: %d\n", ids[0]);
                     params.sampling.preserved_tokens.insert(ids[0]);
@@ -377,18 +436,20 @@ task_params server_task::params_from_json_cmpl(
                     auto ids = common_tokenize(vocab, word, /* add_special= */ false, /* parse_special= */ true);
                     if (ids.size() == 1) {
                         auto token = ids[0];
-                        if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
-                            throw std::runtime_error("Grammar trigger word should be marked as preserved token: " + word);
+                        if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(),
+                                      (llama_token) token) == params.sampling.preserved_tokens.end()) {
+                            throw std::runtime_error("Grammar trigger word should be marked as preserved token: " +
+                                                     word);
                         }
                         SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str());
                         common_grammar_trigger trigger;
-                        trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
+                        trigger.type  = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
                         trigger.value = word;
                         trigger.token = token;
                         params.sampling.grammar_triggers.push_back(std::move(trigger));
                     } else {
                         SRV_DBG("Grammar trigger word: `%s`\n", word.c_str());
-                        params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
+                        params.sampling.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word });
                     }
                 } else {
                     if (ct.value.type == COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN) {
@@ -428,12 +489,12 @@ task_params server_task::params_from_json_cmpl(
                     if (el[0].is_number_integer()) {
                         llama_token tok = el[0].get<llama_token>();
                         if (tok >= 0 && tok < n_vocab) {
-                            params.sampling.logit_bias.push_back({tok, bias});
+                            params.sampling.logit_bias.push_back({ tok, bias });
                         }
                     } else if (el[0].is_string()) {
                         auto toks = common_tokenize(vocab, el[0].get<std::string>(), false);
                         for (auto tok : toks) {
-                            params.sampling.logit_bias.push_back({tok, bias});
+                            params.sampling.logit_bias.push_back({ tok, bias });
                         }
                     }
                 }
@@ -441,8 +502,8 @@ task_params server_task::params_from_json_cmpl(
         } else if (logit_bias != data.end() && logit_bias->is_object()) {
             const int n_vocab = llama_vocab_n_tokens(vocab);
             for (const auto & el : logit_bias->items()) {
-                float bias;
-                const auto & key = el.key();
+                float        bias;
+                const auto & key   = el.key();
                 const auto & value = el.value();
                 if (value.is_number()) {
                     bias = value.get<float>();
@@ -452,16 +513,16 @@ task_params server_task::params_from_json_cmpl(
                     continue;
                 }
 
-                char *end;
+                char *      end;
                 llama_token tok = strtol(key.c_str(), &end, 10);
                 if (*end == 0) {
                     if (tok >= 0 && tok < n_vocab) {
-                        params.sampling.logit_bias.push_back({tok, bias});
+                        params.sampling.logit_bias.push_back({ tok, bias });
                     }
                 } else {
                     auto toks = common_tokenize(vocab, key, false);
                     for (auto tok : toks) {
-                        params.sampling.logit_bias.push_back({tok, bias});
+                        params.sampling.logit_bias.push_back({ tok, bias });
                     }
                 }
             }
@@ -469,9 +530,9 @@ task_params server_task::params_from_json_cmpl(
 
         params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos);
         if (params.sampling.ignore_eos) {
-            params.sampling.logit_bias.insert(
-                    params.sampling.logit_bias.end(),
-                    defaults.sampling.logit_bias_eog.begin(), defaults.sampling.logit_bias_eog.end());
+            params.sampling.logit_bias.insert(params.sampling.logit_bias.end(),
+                                              defaults.sampling.logit_bias_eog.begin(),
+                                              defaults.sampling.logit_bias_eog.end());
         }
     }
 
@@ -497,7 +558,7 @@ task_params server_task::params_from_json_cmpl(
         if (samplers != data.end()) {
             if (samplers->is_array()) {
                 params.sampling.samplers = common_sampler_types_from_names(*samplers, false);
-            } else if (samplers->is_string()){
+            } else if (samplers->is_string()) {
                 params.sampling.samplers = common_sampler_types_from_chars(samplers->get<std::string>());
             }
         } else {
@@ -518,21 +579,21 @@ task_params server_task::params_from_json_cmpl(
 
 json result_timings::to_json() const {
     json base = {
-        {"cache_n",                cache_n},
+        { "cache_n",                cache_n                },
 
-        {"prompt_n",               prompt_n},
-        {"prompt_ms",              prompt_ms},
-        {"prompt_per_token_ms",    prompt_per_token_ms},
-        {"prompt_per_second",      prompt_per_second},
+        { "prompt_n",               prompt_n               },
+        { "prompt_ms",              prompt_ms              },
+        { "prompt_per_token_ms",    prompt_per_token_ms    },
+        { "prompt_per_second",      prompt_per_second      },
 
-        {"predicted_n",            predicted_n},
-        {"predicted_ms",           predicted_ms},
-        {"predicted_per_token_ms", predicted_per_token_ms},
-        {"predicted_per_second",   predicted_per_second},
+        { "predicted_n",            predicted_n            },
+        { "predicted_ms",           predicted_ms           },
+        { "predicted_per_token_ms", predicted_per_token_ms },
+        { "predicted_per_second",   predicted_per_second   },
     };
 
     if (draft_n > 0) {
-        base["draft_n"] = draft_n;
+        base["draft_n"]          = draft_n;
         base["draft_n_accepted"] = draft_n_accepted;
     }
 
@@ -543,20 +604,24 @@ json result_timings::to_json() const {
 // result_prompt_progress
 //
 json result_prompt_progress::to_json() const {
-    return json {
-        {"total",     total},
-        {"cache",     cache},
-        {"processed", processed},
-        {"time_ms",   time_ms},
+    return json{
+        { "total",     total     },
+        { "cache",     cache     },
+        { "processed", processed },
+        { "time_ms",   time_ms   },
     };
 }
 
 static inline std::string stop_type_to_str(stop_type type) {
     switch (type) {
-        case STOP_TYPE_EOS:   return "eos";
-        case STOP_TYPE_WORD:  return "word";
-        case STOP_TYPE_LIMIT: return "limit";
-        default:              return "none";
+        case STOP_TYPE_EOS:
+            return "eos";
+        case STOP_TYPE_WORD:
+            return "word";
+        case STOP_TYPE_LIMIT:
+            return "limit";
+        default:
+            return "none";
     }
 }
 
@@ -569,36 +634,28 @@ json completion_token_output::to_json(bool post_sampling_probs) const {
     for (const auto & p : probs) {
         std::string txt(p.txt);
         txt.resize(validate_utf8(txt));
-        probs_for_token.push_back(json {
-            {"id",      p.tok},
-            {"token",   txt},
-            {"bytes",   str_to_bytes(p.txt)},
-            {
-                post_sampling_probs ? "prob" : "logprob",
-                post_sampling_probs ? p.prob : logarithm(p.prob)
-            },
+        probs_for_token.push_back(json{
+            { "id",                                     p.tok                                            },
+            { "token",                                  txt                                              },
+            { "bytes",                                  str_to_bytes(p.txt)                              },
+            { post_sampling_probs ? "prob" : "logprob", post_sampling_probs ? p.prob : logarithm(p.prob) },
         });
     }
     return probs_for_token;
 }
 
-json completion_token_output::probs_vector_to_json(const std::vector<completion_token_output> & probs, bool post_sampling_probs) {
+json completion_token_output::probs_vector_to_json(const std::vector<completion_token_output> & probs,
+                                                   bool                                         post_sampling_probs) {
     json out = json::array();
     for (const auto & p : probs) {
         std::string txt(p.text_to_send);
         txt.resize(validate_utf8(txt));
-        out.push_back(json {
-            {"id",           p.tok},
-            {"token",        txt},
-            {"bytes",        str_to_bytes(p.text_to_send)},
-            {
-                post_sampling_probs ? "prob" : "logprob",
-                post_sampling_probs ? p.prob : logarithm(p.prob)
-            },
-            {
-                post_sampling_probs ? "top_probs" : "top_logprobs",
-                p.to_json(post_sampling_probs)
-            },
+        out.push_back(json{
+            { "id",                                               p.tok                                            },
+            { "token",                                            txt                                              },
+            { "bytes",                                            str_to_bytes(p.text_to_send)                     },
+            { post_sampling_probs ? "prob" : "logprob",           post_sampling_probs ? p.prob : logarithm(p.prob) },
+            { post_sampling_probs ? "top_probs" : "top_logprobs", p.to_json(post_sampling_probs)                   },
         });
     }
     return out;
@@ -639,61 +696,58 @@ json server_task_result_cmpl_final::to_json() {
 }
 
 json server_task_result_cmpl_final::to_json_non_oaicompat() {
-    json res = json {
-        {"index",               index},
-        {"content",             content},
-        {"tokens",              tokens},
-        {"id_slot",             id_slot},
-        {"stop",                true},
-        {"model",               oaicompat_model},
-        {"tokens_predicted",    n_decoded},
-        {"tokens_evaluated",    n_prompt_tokens},
-        {"generation_settings", generation_params.to_json()},
-        {"prompt",              prompt},
-        {"has_new_line",        has_new_line},
-        {"truncated",           truncated},
-        {"stop_type",           stop_type_to_str(stop)},
-        {"stopping_word",       stopping_word},
-        {"tokens_cached",       n_tokens_cached},
-        {"timings",             timings.to_json()},
+    json res = json{
+        { "index",               index                       },
+        { "content",             content                     },
+        { "tokens",              tokens                      },
+        { "id_slot",             id_slot                     },
+        { "stop",                true                        },
+        { "model",               oaicompat_model             },
+        { "tokens_predicted",    n_decoded                   },
+        { "tokens_evaluated",    n_prompt_tokens             },
+        { "generation_settings", generation_params.to_json() },
+        { "prompt",              prompt                      },
+        { "has_new_line",        has_new_line                },
+        { "truncated",           truncated                   },
+        { "stop_type",           stop_type_to_str(stop)      },
+        { "stopping_word",       stopping_word               },
+        { "tokens_cached",       n_tokens_cached             },
+        { "timings",             timings.to_json()           },
     };
     if (!stream && !probs_output.empty()) {
-        res["completion_probabilities"] = completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs);
+        res["completion_probabilities"] =
+            completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs);
     }
     return response_fields.empty() ? res : json_get_nested_values(response_fields, res);
 }
 
 json server_task_result_cmpl_final::to_json_oaicompat() {
-    std::time_t t = std::time(0);
-    json logprobs = json(nullptr); // OAI default to null
+    std::time_t t        = std::time(0);
+    json        logprobs = json(nullptr);  // OAI default to null
     if (!stream && probs_output.size() > 0) {
         logprobs = json{
-            {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)},
+            { "content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs) },
         };
     }
     json finish_reason = "length";
     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
         finish_reason = "stop";
     }
-    json res = json {
-        {"choices",            json::array({
-            json{
-                {"text",          content},
-                {"index",         index},
-                {"logprobs",      logprobs},
-                {"finish_reason", finish_reason},
-            }
-        })},
-        {"created",            t},
-        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
-        {"object",             "text_completion"},
-        {"usage", json {
-            {"completion_tokens", n_decoded},
-            {"prompt_tokens",     n_prompt_tokens},
-            {"total_tokens",      n_decoded + n_prompt_tokens}
-        }},
-        {"id", oaicompat_cmpl_id}
+    json res = json{
+        { "choices",            json::array({ json{
+                         { "text", content },
+                         { "index", index },
+                         { "logprobs", logprobs },
+                         { "finish_reason", finish_reason },
+                     } })                                                },
+        { "created",            t                                                            },
+        { "model",              oaicompat_model                                              },
+        { "system_fingerprint", build_info                                                   },
+        { "object",             "text_completion"                                            },
+        { "usage",              json{ { "completion_tokens", n_decoded },
+                         { "prompt_tokens", n_prompt_tokens },
+                         { "total_tokens", n_decoded + n_prompt_tokens } } },
+        { "id",                 oaicompat_cmpl_id                                            }
     };
 
     // extra fields for debugging purposes
@@ -701,19 +755,19 @@ json server_task_result_cmpl_final::to_json_oaicompat() {
         res["__verbose"] = to_json_non_oaicompat();
     }
     if (timings.prompt_n >= 0) {
-        res.push_back({"timings", timings.to_json()});
+        res.push_back({ "timings", timings.to_json() });
     }
 
     return res;
 }
 
 json server_task_result_cmpl_final::to_json_oaicompat_chat() {
-    std::string finish_reason = "length";
+    std::string     finish_reason = "length";
     common_chat_msg msg;
     if (!oaicompat_msg.empty()) {
         msg = oaicompat_msg;
     } else {
-        msg.role = "assistant";
+        msg.role    = "assistant";
         msg.content = content;
     }
     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
@@ -728,24 +782,22 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() {
 
     if (!stream && probs_output.size() > 0) {
         choice["logprobs"] = json{
-            {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)},
+            { "content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs) },
         };
     }
 
     std::time_t t = std::time(0);
 
-    json res = json {
-        {"choices",            json::array({choice})},
-        {"created",            t},
-        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
-        {"object",             "chat.completion"},
-        {"usage", json {
-            {"completion_tokens", n_decoded},
-            {"prompt_tokens",     n_prompt_tokens},
-            {"total_tokens",      n_decoded + n_prompt_tokens}
-        }},
-        {"id", oaicompat_cmpl_id}
+    json res = json{
+        { "choices",            json::array({ choice })                                      },
+        { "created",            t                                                            },
+        { "model",              oaicompat_model                                              },
+        { "system_fingerprint", build_info                                                   },
+        { "object",             "chat.completion"                                            },
+        { "usage",              json{ { "completion_tokens", n_decoded },
+                         { "prompt_tokens", n_prompt_tokens },
+                         { "total_tokens", n_decoded + n_prompt_tokens } } },
+        { "id",                 oaicompat_cmpl_id                                            }
     };
 
     // extra fields for debugging purposes
@@ -753,14 +805,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() {
         res["__verbose"] = to_json_non_oaicompat();
     }
     if (timings.prompt_n >= 0) {
-        res.push_back({"timings", timings.to_json()});
+        res.push_back({ "timings", timings.to_json() });
     }
 
     return res;
 }
 
 json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
-    std::time_t t = std::time(0);
+    std::time_t t             = std::time(0);
     std::string finish_reason = "length";
     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
         finish_reason = oaicompat_msg.tool_calls.empty() ? "stop" : "tool_calls";
@@ -785,40 +837,41 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
     }
 
     deltas.push_back({
-        {"choices", json::array({
-            json {
-                {"finish_reason", finish_reason},
-                {"index", 0},
-                {"delta", json::object()},
-            },
-        })},
-        {"created",            t},
-        {"id",                 oaicompat_cmpl_id},
-        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
-        {"object",             "chat.completion.chunk"},
+        { "choices",            json::array({
+                         json{
+                             { "finish_reason", finish_reason },
+                             { "index", 0 },
+                             { "delta", json::object() },
+                         },
+                     })             },
+        { "created",            t                       },
+        { "id",                 oaicompat_cmpl_id       },
+        { "model",              oaicompat_model         },
+        { "system_fingerprint", build_info              },
+        { "object",             "chat.completion.chunk" },
     });
 
     if (include_usage) {
         // OpenAI API spec for chat.completion.chunks specifies an empty `choices` array for the last chunk when including usage
         // https://platform.openai.com/docs/api-reference/chat_streaming/streaming#chat_streaming/streaming-choices
         deltas.push_back({
-            {"choices", json::array()},
-            {"created",            t},
-            {"id",                 oaicompat_cmpl_id},
-            {"model",              oaicompat_model},
-            {"system_fingerprint", build_info},
-            {"object",             "chat.completion.chunk"},
-            {"usage", json {
-                {"completion_tokens", n_decoded},
-                {"prompt_tokens",     n_prompt_tokens},
-                {"total_tokens",      n_decoded + n_prompt_tokens},
-            }},
+            { "choices",            json::array()           },
+            { "created",            t                       },
+            { "id",                 oaicompat_cmpl_id       },
+            { "model",              oaicompat_model         },
+            { "system_fingerprint", build_info              },
+            { "object",             "chat.completion.chunk" },
+            { "usage",
+             json{
+                  { "completion_tokens", n_decoded },
+                  { "prompt_tokens", n_prompt_tokens },
+                  { "total_tokens", n_decoded + n_prompt_tokens },
+              }                                             },
         });
     }
 
     if (timings.prompt_n >= 0) {
-        deltas.back().push_back({"timings", timings.to_json()});
+        deltas.back().push_back({ "timings", timings.to_json() });
     }
 
     // extra fields for debugging purposes
@@ -1021,7 +1074,7 @@ json server_task_result_cmpl_final::to_json_anthropic() {
     if (!oaicompat_msg.empty()) {
         msg = oaicompat_msg;
     } else {
-        msg.role = "assistant";
+        msg.role    = "assistant";
         msg.content = content;
     }
 
@@ -1036,16 +1089,16 @@ json server_task_result_cmpl_final::to_json_anthropic() {
 
     if (!msg.content.empty()) {
         content_blocks.push_back({
-            {"type", "text"},
-            {"text", msg.content}
+            { "type", "text"      },
+            { "text", msg.content }
         });
     }
 
     for (const auto & tool_call : msg.tool_calls) {
         json tool_use_block = {
-            {"type", "tool_use"},
-            {"id", tool_call.id},
-            {"name", tool_call.name}
+            { "type", "tool_use"     },
+            { "id",   tool_call.id   },
+            { "name", tool_call.name }
         };
 
         try {
@@ -1058,17 +1111,14 @@ json server_task_result_cmpl_final::to_json_anthropic() {
     }
 
     json res = {
-        {"id", oaicompat_cmpl_id},
-        {"type", "message"},
-        {"role", "assistant"},
-        {"content", content_blocks},
-        {"model", oaicompat_model},
-        {"stop_reason", stop_reason},
-        {"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)},
-        {"usage", {
-            {"input_tokens", n_prompt_tokens},
-            {"output_tokens", n_decoded}
-        }}
+        { "id",            oaicompat_cmpl_id                                                       },
+        { "type",          "message"                                                               },
+        { "role",          "assistant"                                                             },
+        { "content",       content_blocks                                                          },
+        { "model",         oaicompat_model                                                         },
+        { "stop_reason",   stop_reason                                                             },
+        { "stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)                   },
+        { "usage",         { { "input_tokens", n_prompt_tokens }, { "output_tokens", n_decoded } } }
     };
 
     return res;
@@ -1163,31 +1213,27 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
                 const auto & full_tool_call = oaicompat_msg.tool_calls[diff.tool_call_index];
 
                 events.push_back({
-                    {"event", "content_block_start"},
-                    {"data", {
-                        {"type", "content_block_start"},
-                        {"index", content_block_index},
-                        {"content_block", {
-                            {"type", "tool_use"},
-                            {"id", full_tool_call.id},
-                            {"name", full_tool_call.name}
-                        }}
-                    }}
+                    { "event", "content_block_start"              },
+                    { "data",
+                     { { "type", "content_block_start" },
+                        { "index", content_block_index },
+                        { "content_block",
+                          { { "type", "tool_use" },
+                            { "id", full_tool_call.id },
+                            { "name", full_tool_call.name } } } } }
                 });
                 tool_calls_started.insert(diff.tool_call_index);
             }
 
             if (!diff.tool_call_delta.arguments.empty()) {
                 events.push_back({
-                    {"event", "content_block_delta"},
-                    {"data", {
-                        {"type", "content_block_delta"},
-                        {"index", content_block_index},
-                        {"delta", {
-                            {"type", "input_json_delta"},
-                            {"partial_json", diff.tool_call_delta.arguments}
-                        }}
-                    }}
+                    { "event", "content_block_delta"                                 },
+                    { "data",
+                     { { "type", "content_block_delta" },
+                        { "index", content_block_index },
+                        { "delta",
+                          { { "type", "input_json_delta" },
+                            { "partial_json", diff.tool_call_delta.arguments } } } } }
                 });
             }
         }
@@ -1230,33 +1276,24 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
     for (size_t i = 0; i < num_tool_calls; i++) {
         size_t content_block_index = (has_thinking ? 1 : 0) + (has_text ? 1 : 0) + i;
         events.push_back({
-            {"event", "content_block_stop"},
-            {"data", {
-                {"type", "content_block_stop"},
-                {"index", content_block_index}
-            }}
+            { "event", "content_block_stop"                                                   },
+            { "data",  { { "type", "content_block_stop" }, { "index", content_block_index } } }
         });
     }
 
     events.push_back({
-        {"event", "message_delta"},
-        {"data", {
-            {"type", "message_delta"},
-            {"delta", {
-                {"stop_reason", stop_reason},
-                {"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)}
-            }},
-            {"usage", {
-                {"output_tokens", n_decoded}
-            }}
-        }}
+        { "event", "message_delta"                            },
+        { "data",
+         { { "type", "message_delta" },
+            { "delta",
+              { { "stop_reason", stop_reason },
+                { "stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word) } } },
+            { "usage", { { "output_tokens", n_decoded } } } } }
     });
 
     events.push_back({
-        {"event", "message_stop"},
-        {"data", {
-            {"type", "message_stop"}
-        }}
+        { "event", "message_stop"                 },
+        { "data",  { { "type", "message_stop" } } }
     });
 
     return events;
@@ -1315,50 +1352,49 @@ json server_task_result_cmpl_partial::to_json() {
 
 json server_task_result_cmpl_partial::to_json_non_oaicompat() {
     // non-OAI-compat JSON
-    json res = json {
-        {"index",            index},
-        {"content",          content},
-        {"tokens",           tokens},
-        {"stop",             false},
-        {"id_slot",          id_slot},
-        {"tokens_predicted", n_decoded},
-        {"tokens_evaluated", n_prompt_tokens},
+    json res = json{
+        { "index",            index           },
+        { "content",          content         },
+        { "tokens",           tokens          },
+        { "stop",             false           },
+        { "id_slot",          id_slot         },
+        { "tokens_predicted", n_decoded       },
+        { "tokens_evaluated", n_prompt_tokens },
     };
     // populate the timings object when needed (usually for the last response or with timings_per_token enabled)
     if (timings.prompt_n > 0) {
-        res.push_back({"timings", timings.to_json()});
+        res.push_back({ "timings", timings.to_json() });
     }
     if (is_progress) {
-        res.push_back({"prompt_progress", progress.to_json()});
+        res.push_back({ "prompt_progress", progress.to_json() });
     }
     if (!prob_output.probs.empty()) {
-        res["completion_probabilities"] = completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs);
+        res["completion_probabilities"] =
+            completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs);
     }
     return res;
 }
 
 json server_task_result_cmpl_partial::to_json_oaicompat() {
-    std::time_t t = std::time(0);
-    json logprobs = json(nullptr); // OAI default to null
+    std::time_t t        = std::time(0);
+    json        logprobs = json(nullptr);  // OAI default to null
     if (prob_output.probs.size() > 0) {
         logprobs = json{
-            {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)},
+            { "content", completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs) },
         };
     }
-    json res = json {
-        {"choices",            json::array({
-            json{
-                {"text",          content},
-                {"index",         index},
-                {"logprobs",      logprobs},
-                {"finish_reason", nullptr},
-            }
-        })},
-        {"created",            t},
-        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
-        {"object",             "text_completion"},
-        {"id",                 oaicompat_cmpl_id}
+    json res = json{
+        { "choices",            json::array({ json{
+                         { "text", content },
+                         { "index", index },
+                         { "logprobs", logprobs },
+                         { "finish_reason", nullptr },
+                     } })     },
+        { "created",            t                 },
+        { "model",              oaicompat_model   },
+        { "system_fingerprint", build_info        },
+        { "object",             "text_completion" },
+        { "id",                 oaicompat_cmpl_id }
     };
 
     // extra fields for debugging purposes
@@ -1366,42 +1402,42 @@ json server_task_result_cmpl_partial::to_json_oaicompat() {
         res["__verbose"] = to_json_non_oaicompat();
     }
     if (timings.prompt_n >= 0) {
-        res.push_back({"timings", timings.to_json()});
+        res.push_back({ "timings", timings.to_json() });
     }
     if (is_progress) {
-        res.push_back({"prompt_progress", progress.to_json()});
+        res.push_back({ "prompt_progress", progress.to_json() });
     }
 
     return res;
 }
 
 json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
-    bool first = n_decoded == 1;
-    std::time_t t = std::time(0);
-    json choices;
+    bool        first = n_decoded == 1;
+    std::time_t t     = std::time(0);
+    json        choices;
 
     std::vector<json> deltas;
-    auto add_delta = [&](const json & delta) {
+    auto              add_delta = [&](const json & delta) {
         deltas.push_back({
-            {"choices", json::array({
-                json {
-                    {"finish_reason", nullptr},
-                    {"index", index},
-                    {"delta", delta},
-                },
-            })},
-            {"created", t},
-            {"id", oaicompat_cmpl_id},
-            {"model", oaicompat_model},
-            {"system_fingerprint", build_info},
-            {"object", "chat.completion.chunk"},
+            { "choices",            json::array({
+                             json{
+                                              { "finish_reason", nullptr },
+                                              { "index", index },
+                                              { "delta", delta },
+                             },
+                         })         },
+            { "created",            t                       },
+            { "id",                 oaicompat_cmpl_id       },
+            { "model",              oaicompat_model         },
+            { "system_fingerprint", build_info              },
+            { "object",             "chat.completion.chunk" },
         });
     };
     // We have to send an initial update to conform to openai behavior
     if (first || is_progress) {
         add_delta({
-            {"role", "assistant"},
-            {"content", nullptr},
+            { "role",    "assistant" },
+            { "content", nullptr     },
         });
     }
 
@@ -1414,16 +1450,16 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
         GGML_ASSERT(last_json.at("choices").size() >= 1);
 
         if (prob_output.probs.size() > 0) {
-            last_json.at("choices").at(0)["logprobs"] = json {
-                {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)},
+            last_json.at("choices").at(0)["logprobs"] = json{
+                { "content", completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs) },
             };
         }
 
         if (timings.prompt_n >= 0) {
-            last_json.push_back({"timings", timings.to_json()});
+            last_json.push_back({ "timings", timings.to_json() });
         }
         if (is_progress) {
-            last_json.push_back({"prompt_progress", progress.to_json()});
+            last_json.push_back({ "prompt_progress", progress.to_json() });
         }
     }
 
@@ -1564,23 +1600,18 @@ json server_task_result_cmpl_partial::to_json_anthropic() {
 
     if (first) {
         events.push_back({
-            {"event", "message_start"},
-            {"data", {
-                {"type", "message_start"},
-                {"message", {
-                    {"id", oaicompat_cmpl_id},
-                    {"type", "message"},
-                    {"role", "assistant"},
-                    {"content", json::array()},
-                    {"model", oaicompat_model},
-                    {"stop_reason", nullptr},
-                    {"stop_sequence", nullptr},
-                    {"usage", {
-                        {"input_tokens", n_prompt_tokens},
-                        {"output_tokens", 0}
-                    }}
-                }}
-            }}
+            { "event", "message_start"                                                                 },
+            { "data",
+             { { "type", "message_start" },
+                { "message",
+                  { { "id", oaicompat_cmpl_id },
+                    { "type", "message" },
+                    { "role", "assistant" },
+                    { "content", json::array() },
+                    { "model", oaicompat_model },
+                    { "stop_reason", nullptr },
+                    { "stop_sequence", nullptr },
+                    { "usage", { { "input_tokens", n_prompt_tokens }, { "output_tokens", 0 } } } } } } }
         });
     }
 
@@ -1662,30 +1693,26 @@ json server_task_result_cmpl_partial::to_json_anthropic() {
 
             if (!diff.tool_call_delta.name.empty()) {
                 events.push_back({
-                    {"event", "content_block_start"},
-                    {"data", {
-                        {"type", "content_block_start"},
-                        {"index", content_block_index},
-                        {"content_block", {
-                            {"type", "tool_use"},
-                            {"id", diff.tool_call_delta.id},
-                            {"name", diff.tool_call_delta.name}
-                        }}
-                    }}
+                    { "event", "content_block_start"                    },
+                    { "data",
+                     { { "type", "content_block_start" },
+                        { "index", content_block_index },
+                        { "content_block",
+                          { { "type", "tool_use" },
+                            { "id", diff.tool_call_delta.id },
+                            { "name", diff.tool_call_delta.name } } } } }
                 });
             }
 
             if (!diff.tool_call_delta.arguments.empty()) {
                 events.push_back({
-                    {"event", "content_block_delta"},
-                    {"data", {
-                        {"type", "content_block_delta"},
-                        {"index", content_block_index},
-                        {"delta", {
-                            {"type", "input_json_delta"},
-                            {"partial_json", diff.tool_call_delta.arguments}
-                        }}
-                    }}
+                    { "event", "content_block_delta"                                 },
+                    { "data",
+                     { { "type", "content_block_delta" },
+                        { "index", content_block_index },
+                        { "delta",
+                          { { "type", "input_json_delta" },
+                            { "partial_json", diff.tool_call_delta.arguments } } } } }
                 });
             }
         }
@@ -1745,28 +1772,28 @@ json server_task_result_error::to_json() {
 // server_task_result_metrics
 //
 json server_task_result_metrics::to_json() {
-    return json {
-        { "idle",                            n_idle_slots },
-        { "processing",                      n_processing_slots },
-        { "deferred",                        n_tasks_deferred },
-        { "t_start",                         t_start },
+    return json{
+        { "idle",                            n_idle_slots                    },
+        { "processing",                      n_processing_slots              },
+        { "deferred",                        n_tasks_deferred                },
+        { "t_start",                         t_start                         },
 
         { "n_prompt_tokens_processed_total", n_prompt_tokens_processed_total },
-        { "t_tokens_generation_total",       t_tokens_generation_total },
-        { "n_tokens_predicted_total",        n_tokens_predicted_total },
-        { "t_prompt_processing_total",       t_prompt_processing_total },
+        { "t_tokens_generation_total",       t_tokens_generation_total       },
+        { "n_tokens_predicted_total",        n_tokens_predicted_total        },
+        { "t_prompt_processing_total",       t_prompt_processing_total       },
 
-        { "n_tokens_max",                    n_tokens_max },
+        { "n_tokens_max",                    n_tokens_max                    },
 
-        { "n_prompt_tokens_processed",       n_prompt_tokens_processed },
-        { "t_prompt_processing",             t_prompt_processing },
-        { "n_tokens_predicted",              n_tokens_predicted },
-        { "t_tokens_generation",             t_tokens_generation },
+        { "n_prompt_tokens_processed",       n_prompt_tokens_processed       },
+        { "t_prompt_processing",             t_prompt_processing             },
+        { "n_tokens_predicted",              n_tokens_predicted              },
+        { "t_tokens_generation",             t_tokens_generation             },
 
-        { "n_decode_total",                  n_decode_total },
-        { "n_busy_slots_total",              n_busy_slots_total },
+        { "n_decode_total",                  n_decode_total                  },
+        { "n_busy_slots_total",              n_busy_slots_total              },
 
-        { "slots",                           slots_data },
+        { "slots",                           slots_data                      },
     };
 }
 
@@ -1775,25 +1802,21 @@ json server_task_result_metrics::to_json() {
 //
 json server_task_result_slot_save_load::to_json() {
     if (is_save) {
-        return json {
-            { "id_slot",   id_slot },
-            { "filename",  filename },
-            { "n_saved",   n_tokens },
-            { "n_written", n_bytes },
-            { "timings", {
-                { "save_ms", t_ms }
-            }},
+        return json{
+            { "id_slot",   id_slot                 },
+            { "filename",  filename                },
+            { "n_saved",   n_tokens                },
+            { "n_written", n_bytes                 },
+            { "timings",   { { "save_ms", t_ms } } },
         };
     }
 
-    return json {
-        { "id_slot",    id_slot },
-        { "filename",   filename },
-        { "n_restored", n_tokens },
-        { "n_read",     n_bytes },
-        { "timings", {
-            { "restore_ms", t_ms }
-        }},
+    return json{
+        { "id_slot",    id_slot                    },
+        { "filename",   filename                   },
+        { "n_restored", n_tokens                   },
+        { "n_read",     n_bytes                    },
+        { "timings",    { { "restore_ms", t_ms } } },
     };
 }
 
@@ -1801,8 +1824,8 @@ json server_task_result_slot_save_load::to_json() {
 // server_task_result_slot_erase
 //
 json server_task_result_slot_erase::to_json() {
-    return json {
-        { "id_slot",  id_slot },
+    return json{
+        { "id_slot",  id_slot  },
         { "n_erased", n_erased },
     };
 }
@@ -1814,13 +1837,13 @@ json server_task_result_slot_erase::to_json() {
 json server_task_result_get_lora::to_json() {
     json result = json::array();
     for (size_t i = 0; i < loras.size(); ++i) {
-        auto & lora = loras[i];
-        json entry = {
-            {"id",            i},
-            {"path",          lora.info.path},
-            {"scale",         lora.info.scale},
-            {"task_name",     lora.info.task_name},
-            {"prompt_prefix", lora.info.prompt_prefix},
+        auto & lora  = loras[i];
+        json   entry = {
+            { "id",            i                       },
+            { "path",          lora.info.path          },
+            { "scale",         lora.info.scale         },
+            { "task_name",     lora.info.task_name     },
+            { "prompt_prefix", lora.info.prompt_prefix },
         };
         if (!lora.alora_invocation_tokens.empty()) {
             entry["alora_invocation_string"] = lora.alora_invocation_string;
@@ -1836,7 +1859,9 @@ json server_task_result_get_lora::to_json() {
 //
 
 json server_task_result_apply_lora::to_json() {
-    return json {{ "success", true }};
+    return json{
+        { "success", true }
+    };
 }
 
 //
@@ -1894,7 +1919,7 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
     } catch (const std::bad_alloc & e) {
         SRV_ERR("failed to allocate memory for prompt cache state: %s\n", e.what());
 
-        limit_size = std::max<size_t>(1, 0.4*size());
+        limit_size = std::max<size_t>(1, 0.4 * size());
 
         SRV_WRN(" - cache size limit reduced to %.3f MiB\n", limit_size / (1024.0 * 1024.0));
 
@@ -1905,16 +1930,19 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
 
     // TODO: for some reason we can't copy server_tokens, so we have to do this workaround
     auto & cur = states.emplace_back();
-    cur = {
-        /*.tokens      =*/ server_tokens(prompt.tokens.get_text_tokens(), false),
-        /*.data        =*/ std::move(state_data),
-        /*.checkpoints =*/ prompt.checkpoints,
+    cur        = {
+        /*.tokens      =*/server_tokens(prompt.tokens.get_text_tokens(), false),
+        /*.data        =*/std::move(state_data),
+        /*.checkpoints =*/prompt.checkpoints,
     };
 
     return &cur;
 }
 
-bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tokens_new, llama_context * ctx, int32_t id_slot) {
+bool server_prompt_cache::load(server_prompt &       prompt,
+                               const server_tokens & tokens_new,
+                               llama_context *       ctx,
+                               int32_t               id_slot) {
     const int lcp_best = prompt.tokens.get_common_prefix(tokens_new);
 
     float f_keep_best = float(lcp_best) / prompt.tokens.size();
@@ -1948,7 +1976,7 @@ bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tok
         SRV_WRN(" - found better prompt with f_keep = %.3f, sim = %.3f\n", f_keep_best, sim_best);
 
         const size_t size = it_best->data.size();
-        const size_t n = llama_state_seq_set_data_ext(ctx, it_best->data.data(), size, id_slot, 0);
+        const size_t n    = llama_state_seq_set_data_ext(ctx, it_best->data.data(), size, id_slot, 0);
         if (n != size) {
             SRV_WRN("failed to restore state with size %zu\n", size);
 
@@ -1974,7 +2002,8 @@ void server_prompt_cache::update() {
                 break;
             }
 
-            SRV_WRN(" - cache size limit reached, removing oldest entry (size = %.3f MiB)\n", states.front().size() / (1024.0 * 1024.0));
+            SRV_WRN(" - cache size limit reached, removing oldest entry (size = %.3f MiB)\n",
+                    states.front().size() / (1024.0 * 1024.0));
 
             states.pop_front();
         }
@@ -1984,7 +2013,8 @@ void server_prompt_cache::update() {
     const float size_per_token = std::max<float>(1.0f, float(size()) / (std::max<size_t>(1, n_tokens())));
 
     // dynamically increase the token limit if it can fit in the memory limit
-    const size_t limit_tokens_cur = limit_size > 0 ? std::max<size_t>(limit_tokens, limit_size/size_per_token) : limit_tokens;
+    const size_t limit_tokens_cur =
+        limit_size > 0 ? std::max<size_t>(limit_tokens, limit_size / size_per_token) : limit_tokens;
 
     if (limit_tokens > 0) {
         while (states.size() > 1 && n_tokens() > limit_tokens_cur) {
@@ -1999,11 +2029,11 @@ void server_prompt_cache::update() {
         }
     }
 
-    SRV_WRN(" - cache state: %zu prompts, %.3f MiB (limits: %.3f MiB, %zu tokens, %zu est)\n",
-            states.size(), size() / (1024.0 * 1024.0), limit_size / (1024.0 * 1024.0), limit_tokens, limit_tokens_cur);
+    SRV_WRN(" - cache state: %zu prompts, %.3f MiB (limits: %.3f MiB, %zu tokens, %zu est)\n", states.size(),
+            size() / (1024.0 * 1024.0), limit_size / (1024.0 * 1024.0), limit_tokens, limit_tokens_cur);
 
     for (const auto & state : states) {
-        SRV_WRN("   - prompt %p: %7d tokens, checkpoints: %2zu, %9.3f MiB\n",
-                (const void *)&state, state.n_tokens(), state.checkpoints.size(), state.size() / (1024.0 * 1024.0));
+        SRV_WRN("   - prompt %p: %7d tokens, checkpoints: %2zu, %9.3f MiB\n", (const void *) &state, state.n_tokens(),
+                state.checkpoints.size(), state.size() / (1024.0 * 1024.0));
     }
 }
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index a69e8f1a3d..7ccaf3c31b 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -3,10 +3,10 @@
 #include "common.h"
 #include "llama.h"
 
-#include <string>
-#include <unordered_set>
 #include <list>
 #include <map>
+#include <string>
+#include <unordered_set>
 
 // TODO: prevent including the whole server-common.h as we only use server_tokens
 #include "server-common.h"
@@ -30,7 +30,7 @@ enum server_task_type {
 
 // TODO: change this to more generic "response_format" to replace the "format_response_*" in server-common
 enum task_response_type {
-    TASK_RESPONSE_TYPE_NONE, // llama.cpp native format
+    TASK_RESPONSE_TYPE_NONE,  // llama.cpp native format
     TASK_RESPONSE_TYPE_OAI_CHAT,
     TASK_RESPONSE_TYPE_OAI_CMPL,
     TASK_RESPONSE_TYPE_OAI_RESP,
@@ -48,22 +48,23 @@ enum stop_type {
 struct task_params {
     bool stream          = true;
     bool include_usage   = false;
-    bool cache_prompt    = true; // remember the prompt to avoid reprocessing all prompt
+    bool cache_prompt    = true;  // remember the prompt to avoid reprocessing all prompt
     bool return_tokens   = false;
     bool return_progress = false;
 
-    int32_t n_keep    =  0; // number of tokens to keep from initial prompt
-    int32_t n_discard =  0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
-    int32_t n_predict = -1; // new tokens to predict
-    int32_t n_indent  =  0; // minimum line indentation for the generated text in number of whitespace characters
-    int32_t n_cmpl    =  1; // number of completions to generate from this prompt
+    int32_t n_keep = 0;  // number of tokens to keep from initial prompt
+    int32_t n_discard =
+        0;  // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
+    int32_t n_predict = -1;     // new tokens to predict
+    int32_t n_indent  = 0;      // minimum line indentation for the generated text in number of whitespace characters
+    int32_t n_cmpl    = 1;      // number of completions to generate from this prompt
 
-    int32_t n_cache_reuse = 0; // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled)
+    int32_t n_cache_reuse = 0;  // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled)
 
-    int64_t t_max_prompt_ms  = -1; // TODO: implement
-    int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
+    int64_t t_max_prompt_ms  = -1;  // TODO: implement
+    int64_t t_max_predict_ms = -1;  // if positive, limit the generation phase to this time limit
 
-    std::map<int, float> lora; // mapping adapter ID -> scale
+    std::map<int, float> lora;      // mapping adapter ID -> scale
 
     std::vector<std::string> antiprompt;
     std::vector<std::string> response_fields;
@@ -71,7 +72,7 @@ struct task_params {
     bool timings_per_token   = false;
     bool post_sampling_probs = false;
 
-    struct common_params_sampling sampling;
+    struct common_params_sampling    sampling;
     struct common_params_speculative speculative;
 
     // response formatting
@@ -84,7 +85,7 @@ struct task_params {
     common_chat_parser_params chat_parser_params;
 
     // Embeddings
-    int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
+    int32_t embd_normalize = 2;  // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
 
     json format_logit_bias(const std::vector<llama_logit_bias> & logit_bias) const;
     json to_json(bool only_metrics = false) const;
@@ -95,9 +96,10 @@ struct task_result_state {
     // tracking diffs for partial tool calls
     std::vector<common_chat_msg_diff> diffs;
     common_chat_parser_params chat_parser_params;
-    common_chat_msg chat_msg;
-    std::string generated_text; // append new chunks of generated text here
-    std::vector<std::string> generated_tool_call_ids;
+    common_chat_msg                   chat_msg;
+    std::string                       generated_text;  // append new chunks of generated text here
+    std::vector<std::string>          generated_tool_call_ids;
+    std::unordered_set<size_t>        sent_tool_call_names;
 
     // for OpenAI Responses and Anthropic streaming API:
     // track output item / content block state across chunks
@@ -117,17 +119,17 @@ struct task_result_state {
         , oai_resp_message_id("msg_" + random_string()) {}
 
     // parse partial tool calls and update the internal state
-    common_chat_msg update_chat_msg(
-        const std::string & text_added,
-        bool is_partial,
-        std::vector<common_chat_msg_diff> & diffs);
+    common_chat_msg update_chat_msg(const std::string &                 text_added,
+                                    bool                                is_partial,
+                                    std::vector<common_chat_msg_diff> & diffs,
+                                    bool                                filter_tool_calls = false);
 };
 
 struct server_task {
-    int id = -1; // to be filled by server_queue
+    int id = -1;  // to be filled by server_queue
 
     // TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader
-    size_t index = 0; // used when there are multiple prompts (batch request)
+    size_t index = 0;  // used when there are multiple prompts (batch request)
 
     // used by SERVER_TASK_TYPE_CANCEL
     int id_target = -1;
@@ -157,13 +159,14 @@ struct server_task {
         std::string filename;
         std::string filepath;
     };
+
     slot_action slot_action;
 
     // used by SERVER_TASK_TYPE_METRICS
     bool metrics_reset_bucket = false;
 
     // used by SERVER_TASK_TYPE_SET_LORA
-    std::map<int, float> set_lora; // mapping adapter ID -> scale
+    std::map<int, float> set_lora;  // mapping adapter ID -> scale
 
     server_task() = default;
 
@@ -203,11 +206,10 @@ struct server_task {
         }
     }
 
-    static task_params params_from_json_cmpl(
-        const llama_vocab * vocab,
-        const common_params & params_base,
-        const int n_ctx_slot,
-        const json & data);
+    static task_params params_from_json_cmpl(const llama_vocab *   vocab,
+                                             const common_params & params_base,
+                                             const int             n_ctx_slot,
+                                             const json &          data);
 
     // utility function
     static std::unordered_set<int> get_list_id(const std::vector<server_task> & tasks) {
@@ -259,50 +261,53 @@ struct result_timings {
     int32_t cache_n = -1;
 
     int32_t prompt_n = -1;
-    double prompt_ms;
-    double prompt_per_token_ms;
-    double prompt_per_second;
+    double  prompt_ms;
+    double  prompt_per_token_ms;
+    double  prompt_per_second;
 
     int32_t predicted_n = -1;
-    double predicted_ms;
-    double predicted_per_token_ms;
-    double predicted_per_second;
+    double  predicted_ms;
+    double  predicted_per_token_ms;
+    double  predicted_per_second;
 
     // Optional speculative metrics - only included when > 0
-    int32_t draft_n = 0;
+    int32_t draft_n          = 0;
     int32_t draft_n_accepted = 0;
 
     json to_json() const;
 };
 
 struct result_prompt_progress {
-    int32_t total = 0;
-    int32_t cache = 0;
+    int32_t total     = 0;
+    int32_t cache     = 0;
     int32_t processed = 0;
-    int64_t time_ms = 0;
+    int64_t time_ms   = 0;
 
     json to_json() const;
 };
 
 struct server_task_result {
-    int id           = -1;
-    int id_slot      = -1;
+    int id      = -1;
+    int id_slot = -1;
 
     // TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader
-    size_t index = 0; // to be used for batched tasks
+    size_t index = 0;  // to be used for batched tasks
 
     virtual bool is_error() {
         // only used by server_task_result_error
         return false;
     }
+
     virtual bool is_stop() {
         // only used by server_task_result_cmpl_*
         return true;
     }
+
     virtual void update(task_result_state &) {
         // only used by server_task_result_cmpl_*
     }
-    virtual json to_json() = 0;
+
+    virtual json to_json()        = 0;
     virtual ~server_task_result() = default;
 };
 
@@ -311,13 +316,15 @@ using server_task_result_ptr = std::unique_ptr<server_task_result>;
 
 struct completion_token_output {
     llama_token tok;
-    float prob;
+    float       prob;
     std::string text_to_send;
+
     struct prob_info {
         llama_token tok;
         std::string txt;
-        float prob;
+        float       prob;
     };
+
     std::vector<prob_info> probs;
 
     json to_json(bool post_sampling_probs) const;
@@ -327,29 +334,28 @@ struct completion_token_output {
     static float logarithm(float x);
 
     static std::vector<unsigned char> str_to_bytes(const std::string & str);
-
 };
 
 struct server_task_result_cmpl_final : server_task_result {
-    std::string content;
+    std::string  content;
     llama_tokens tokens;
 
-    bool stream;
-    bool include_usage;
+    bool           stream;
+    bool           include_usage;
     result_timings timings;
-    std::string prompt;
+    std::string    prompt;
 
-    bool truncated;
-    int32_t n_decoded;
-    int32_t n_prompt_tokens;
-    int32_t n_tokens_cached;
-    bool has_new_line;
+    bool        truncated;
+    int32_t     n_decoded;
+    int32_t     n_prompt_tokens;
+    int32_t     n_tokens_cached;
+    bool        has_new_line;
     std::string stopping_word;
-    stop_type stop = STOP_TYPE_NONE;
+    stop_type   stop = STOP_TYPE_NONE;
 
-    bool post_sampling_probs;
+    bool                                 post_sampling_probs;
     std::vector<completion_token_output> probs_output;
-    std::vector<std::string>  response_fields;
+    std::vector<std::string>             response_fields;
 
     task_params generation_params;
 
@@ -358,7 +364,7 @@ struct server_task_result_cmpl_final : server_task_result {
     task_response_type res_type = TASK_RESPONSE_TYPE_NONE;
     std::string        oaicompat_model;
     std::string        oaicompat_cmpl_id;
-    common_chat_msg    oaicompat_msg; // to be populated by update()
+    common_chat_msg    oaicompat_msg;                       // to be populated by update()
 
     std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
     bool is_updated = false;
@@ -369,7 +375,7 @@ struct server_task_result_cmpl_final : server_task_result {
     std::string oai_resp_message_id;
 
     virtual bool is_stop() override {
-        return true; // in stream mode, final responses are considered stop
+        return true;  // in stream mode, final responses are considered stop
     }
 
     virtual json to_json() override;
@@ -407,11 +413,11 @@ struct server_task_result_cmpl_partial : server_task_result {
     int32_t n_decoded;
     int32_t n_prompt_tokens;
 
-    bool post_sampling_probs;
-    bool is_progress = false;
+    bool                    post_sampling_probs;
+    bool                    is_progress = false;
     completion_token_output prob_output;
-    result_timings timings;
-    result_prompt_progress progress;
+    result_timings          timings;
+    result_prompt_progress  progress;
 
     // response formatting
     bool               verbose  = false;
@@ -435,7 +441,7 @@ struct server_task_result_cmpl_partial : server_task_result {
     bool anthropic_has_reasoning = false;
 
     virtual bool is_stop() override {
-        return false; // in stream mode, partial responses are not considered stop
+        return false;  // in stream mode, partial responses are not considered stop
     }
 
     virtual void update(task_result_state & state) override;
@@ -477,24 +483,22 @@ struct server_task_result_rerank : server_task_result {
 };
 
 struct server_task_result_error : server_task_result {
-    error_type err_type = ERROR_TYPE_SERVER;
+    error_type  err_type = ERROR_TYPE_SERVER;
     std::string err_msg;
 
     // for ERROR_TYPE_EXCEED_CONTEXT_SIZE
     int32_t n_prompt_tokens = 0;
     int32_t n_ctx           = 0;
 
-    virtual bool is_error() override {
-        return true;
-    }
+    virtual bool is_error() override { return true; }
 
     virtual json to_json() override;
 };
 
 struct server_task_result_metrics : server_task_result {
-    int n_idle_slots;
-    int n_processing_slots;
-    int n_tasks_deferred;
+    int     n_idle_slots;
+    int     n_processing_slots;
+    int     n_tasks_deferred;
     int64_t t_start;
 
     // TODO: somehow reuse server_metrics in the future, instead of duplicating the fields
@@ -523,7 +527,7 @@ struct server_task_result_metrics : server_task_result {
 
 struct server_task_result_slot_save_load : server_task_result {
     std::string filename;
-    bool is_save; // true = save, false = load
+    bool        is_save;  // true = save, false = load
 
     size_t n_tokens;
     size_t n_bytes;
@@ -541,9 +545,10 @@ struct server_task_result_slot_erase : server_task_result {
 struct server_task_result_get_lora : server_task_result {
     struct lora {
         common_adapter_lora_info info;
-        std::string  alora_invocation_string;
-        llama_tokens alora_invocation_tokens;
+        std::string              alora_invocation_string;
+        llama_tokens             alora_invocation_tokens;
     };
+
     std::vector<lora> loras;
 
     virtual json to_json() override;
@@ -559,9 +564,7 @@ struct server_prompt_checkpoint {
 
     std::vector<uint8_t> data;
 
-    size_t size() const {
-        return data.size();
-    }
+    size_t size() const { return data.size(); }
 };
 
 struct server_prompt {
@@ -581,22 +584,14 @@ struct server_prompt {
         return res;
     }
 
-    int n_tokens() const {
-        return tokens.size();
-    }
+    int n_tokens() const { return tokens.size(); }
 
-    server_prompt clone() const {
-        return server_prompt {
-            tokens.clone(),
-            data,
-            checkpoints
-        };
-    }
+    server_prompt clone() const { return server_prompt{ tokens.clone(), data, checkpoints }; }
 };
 
 struct server_prompt_cache {
     server_prompt_cache(int32_t limit_size_mib, size_t limit_tokens) {
-        this->limit_size   = 1024ull*1024ull*(limit_size_mib < 0 ? 0 : limit_size_mib);
+        this->limit_size   = 1024ull * 1024ull * (limit_size_mib < 0 ? 0 : limit_size_mib);
         this->limit_tokens = limit_tokens;
     }