From 9a3ac051577170a0df370df276ad1dd98e7adbb3 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 23 Jan 2026 20:18:25 +0100 Subject: [PATCH 01/39] Add workaround for templates requiring non-null content --- common/chat.cpp | 13 ++++++++ common/jinja/caps.cpp | 69 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 47a34d5822..0662e541ee 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2960,6 +2960,15 @@ static void system_message_not_supported(json & messages) { } } +static void requires_non_null_content(json & messages) { + GGML_ASSERT(messages.is_array()); + for (auto & message : messages) { + if (message.contains("tool_calls") && !messages.contains("content")) { + messages["content"] = ""; + } + } +} + static void func_args_not_string(json & messages) { GGML_ASSERT(messages.is_array()); for (auto & message : messages) { @@ -3065,6 +3074,10 @@ static common_chat_params common_chat_templates_apply_jinja( workaround::system_message_not_supported(params.messages); } + if (!tmpl.original_caps().requires_non_null_content) { + workaround::requires_non_null_content(params.messages); + } + params.extra_context = json::object(); for (auto el : inputs.chat_template_kwargs) { params.extra_context[el.first] = json::parse(el.second); diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp index dbaaed500a..261a5cf0e8 100644 --- a/common/jinja/caps.cpp +++ b/common/jinja/caps.cpp @@ -165,7 +165,7 @@ caps caps_get(jinja::program & prog) { {"content", "Assistant message"}, {"tool_calls", json::array({ { - {"id", "call1"}, + {"id", "call0001"}, {"type", "function"}, {"function", { {"name", "tool1"}, @@ -175,10 +175,10 @@ caps caps_get(jinja::program & prog) { }} }, { - {"id", "call2"}, + {"id", "call0002"}, {"type", "function"}, {"function", { - {"name", "tool2"}, + {"name", "tool1"}, {"arguments", { {"arg", "value"} }} @@ -199,7 +199,7 @@ caps caps_get(jinja::program & prog) { {"name", "tool"}, {"type", "function"}, {"function", { - {"name", "tool"}, + {"name", "tool1"}, {"description", "Tool description"}, {"parameters", { {"type", "object"}, @@ -243,6 +243,67 @@ caps caps_get(jinja::program & prog) { } ); + // case: requires non-null content in tool calls + caps_try_execute( + prog, + [&]() { + // messages + return json::array({ + { + {"role", "user"}, + {"content", "User message"}, + }, + { + {"role", "assistant"}, + {"tool_calls", json::array({ + { + {"id", "call0001"}, + {"type", "function"}, + {"function", { + {"name", "tool1"}, + {"arguments", { + {"arg", "value"} + }} + }} + }, + })} + }, + { + {"role", "user"}, + {"content", "User message"}, + }, + }); + }, + [&]() { + // tools + return json::array({ + { + {"name", "tool"}, + {"type", "function"}, + {"function", { + {"name", "tool1"}, + {"description", "Tool description"}, + {"parameters", { + {"type", "object"}, + {"properties", { + {"arg", { + {"type", "string"}, + {"description", "Arg description"}, + }}, + }}, + {"required", json::array({ "arg" })}, + }}, + }}, + }, + }); + }, + [&](bool success, value & messages, value & tools) { + if (!success) { + result.requires_non_null_content = true; + } + } + ); + // case: preserve reasoning content in chat history caps_try_execute( prog, From 96316496d5b38937e78783327aa6d53f0866b4fd Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 23 Jan 2026 20:38:19 +0100 Subject: [PATCH 02/39] Fix bad typo --- common/chat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 0662e541ee..07114acf33 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2963,8 +2963,8 @@ static void system_message_not_supported(json & messages) { static void requires_non_null_content(json & messages) { GGML_ASSERT(messages.is_array()); for (auto & message : messages) { - if (message.contains("tool_calls") && !messages.contains("content")) { - messages["content"] = ""; + if (message.contains("tool_calls") && !message.contains("content")) { + message["content"] = ""; } } } From 93f0cc05de42033ba53c96683567e0589a749478 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 23 Jan 2026 20:49:39 +0100 Subject: [PATCH 03/39] Fix sanitizer warnings --- common/jinja/caps.cpp | 130 ++++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 57 deletions(-) diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp index 261a5cf0e8..329ccaaa8c 100644 --- a/common/jinja/caps.cpp +++ b/common/jinja/caps.cpp @@ -244,65 +244,81 @@ caps caps_get(jinja::program & prog) { ); // case: requires non-null content in tool calls - caps_try_execute( - prog, - [&]() { - // messages - return json::array({ - { - {"role", "user"}, - {"content", "User message"}, - }, - { - {"role", "assistant"}, - {"tool_calls", json::array({ - { - {"id", "call0001"}, - {"type", "function"}, - {"function", { - {"name", "tool1"}, - {"arguments", { - {"arg", "value"} - }} - }} + if (result.supports_tool_calls) { + caps_try_execute( + prog, + [&]() { + // messages + return json::array({ + { + { "role", "user" }, + { "content", "User message" }, + }, + { + { "role", "assistant" }, + { "tool_calls", + json::array({ + { + { "id", "call0001" }, + { "type", "function" }, + { "function", + { + { "name", "tool1" }, + { "arguments", + { + { "arg", "value" } + } + } + } + } + }, + }) + } + }, + { + { "role", "user" }, + { "content", "User message" }, + }, + }); + }, + [&]() { + // tools + return json::array({ + { + { "name", "tool" }, + { "type", "function" }, + { "function", + { + { "name", "tool1" }, + { "description", "Tool description" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { + { "arg", + { + { "type", "string" }, + { "description", "Arg description" }, + } + }, + } + }, + { "required", json::array({ "arg" }) }, + } + }, + } }, - })} - }, - { - {"role", "user"}, - {"content", "User message"}, - }, - }); - }, - [&]() { - // tools - return json::array({ - { - {"name", "tool"}, - {"type", "function"}, - {"function", { - {"name", "tool1"}, - {"description", "Tool description"}, - {"parameters", { - {"type", "object"}, - {"properties", { - {"arg", { - {"type", "string"}, - {"description", "Arg description"}, - }}, - }}, - {"required", json::array({ "arg" })}, - }}, - }}, - }, - }); - }, - [&](bool success, value & messages, value & tools) { - if (!success) { - result.requires_non_null_content = true; + }, + }); + }, + [&](bool success, value & /* messages */, value & /* tools */) { + if (!success) { + result.requires_non_null_content = true; + } } - } - ); + ); + } // case: preserve reasoning content in chat history caps_try_execute( From 571805b348d8018f74bf1220dc41574bd2ac392a Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 23 Jan 2026 20:53:08 +0100 Subject: [PATCH 04/39] Make call IDs nine-character --- common/jinja/caps.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp index 329ccaaa8c..745f17d50f 100644 --- a/common/jinja/caps.cpp +++ b/common/jinja/caps.cpp @@ -165,7 +165,7 @@ caps caps_get(jinja::program & prog) { {"content", "Assistant message"}, {"tool_calls", json::array({ { - {"id", "call0001"}, + {"id", "call00001"}, {"type", "function"}, {"function", { {"name", "tool1"}, @@ -175,7 +175,7 @@ caps caps_get(jinja::program & prog) { }} }, { - {"id", "call0002"}, + {"id", "call00002"}, {"type", "function"}, {"function", { {"name", "tool1"}, @@ -259,7 +259,7 @@ caps caps_get(jinja::program & prog) { { "tool_calls", json::array({ { - { "id", "call0001" }, + { "id", "call00001" }, { "type", "function" }, { "function", { From 7e6f75a414478bfdc37e73352bfa973c37a852a4 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 13 Dec 2025 15:56:54 +0100 Subject: [PATCH 05/39] THE GIANT AUTOPARSER SQUISH --- common/CMakeLists.txt | 8 +- common/chat-auto-parser-analyzer.cpp | 1461 ++++++ common/chat-auto-parser-generator.cpp | 250 + common/chat-auto-parser-helpers.cpp | 1419 ++++++ common/chat-auto-parser-helpers.h | 133 + common/chat-auto-parser.h | 183 + common/chat-parser-xml-toolcall.cpp | 879 ---- common/chat-parser-xml-toolcall.h | 45 - common/chat-parser.cpp | 1669 ------- common/chat-parser.h | 133 - common/chat-peg-parser.cpp | 982 +++- common/chat-peg-parser.h | 167 +- common/chat.cpp | 2837 ++--------- common/chat.h | 246 +- common/jinja/value.cpp | 38 +- common/jinja/value.h | 2 + common/peg-parser.cpp | 1353 +++-- common/peg-parser.h | 6 + docs/autoparser.md | 513 ++ docs/development/parsing.md | 10 +- .../Apriel-1.6-15b-Thinker-fixed.jinja | 173 + .../templates/Bielik-11B-v3.0-Instruct.jinja | 77 + models/templates/GLM-4.7-Flash.jinja | 86 + models/templates/LFM2-8B-A1B.jinja | 47 + models/templates/Qwen3-Coder.jinja | 4 +- ...seek-ai-DeepSeek-R1-Distill-Llama-8B.jinja | 45 +- ...seek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | 48 +- .../templates/deepseek-ai-DeepSeek-V3.1.jinja | 72 +- models/templates/moonshotai-Kimi-K2.jinja | 86 +- models/templates/unsloth-Apriel-1.5.jinja | 6 +- scripts/server-bench.py | 2 +- scripts/server-test-model.py | 202 + scripts/snapdragon/qdc/tests/test_bench.py | 2 +- src/models/models.h | 120 +- tests/CMakeLists.txt | 4 +- tests/test-backend-ops.cpp | 2 + tests/test-chat-parser.cpp | 617 --- tests/test-chat-peg-parser.cpp | 1048 ++-- tests/test-chat-template.cpp | 680 --- tests/test-chat.cpp | 4348 +++++------------ tools/CMakeLists.txt | 1 + tools/parser/CMakeLists.txt | 8 + tools/parser/debug-template-parser.cpp | 531 ++ tools/server/server-context.cpp | 11 +- tools/server/server-task.cpp | 904 ++-- tools/server/server-task.h | 175 +- 46 files changed, 10290 insertions(+), 11343 deletions(-) create mode 100644 common/chat-auto-parser-analyzer.cpp create mode 100644 common/chat-auto-parser-generator.cpp create mode 100644 common/chat-auto-parser-helpers.cpp create mode 100644 common/chat-auto-parser-helpers.h create mode 100644 common/chat-auto-parser.h delete mode 100644 common/chat-parser-xml-toolcall.cpp delete mode 100644 common/chat-parser-xml-toolcall.h delete mode 100644 common/chat-parser.cpp delete mode 100644 common/chat-parser.h create mode 100644 docs/autoparser.md create mode 100755 models/templates/Apriel-1.6-15b-Thinker-fixed.jinja create mode 100644 models/templates/Bielik-11B-v3.0-Instruct.jinja create mode 100644 models/templates/GLM-4.7-Flash.jinja create mode 100644 models/templates/LFM2-8B-A1B.jinja create mode 100644 scripts/server-test-model.py delete mode 100644 tests/test-chat-parser.cpp delete mode 100644 tests/test-chat-template.cpp create mode 100644 tools/parser/CMakeLists.txt create mode 100644 tools/parser/debug-template-parser.cpp diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 295ae9ea25..689fd367da 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -48,10 +48,10 @@ add_library(${TARGET} STATIC arg.cpp arg.h base64.hpp - chat-parser.cpp - chat-parser.h - chat-parser-xml-toolcall.h - chat-parser-xml-toolcall.cpp + chat-auto-parser-analyzer.cpp + chat-auto-parser-generator.cpp + chat-auto-parser-helpers.cpp + chat-auto-parser.h chat-peg-parser.cpp chat-peg-parser.h chat.cpp diff --git a/common/chat-auto-parser-analyzer.cpp b/common/chat-auto-parser-analyzer.cpp new file mode 100644 index 0000000000..db6aa2c547 --- /dev/null +++ b/common/chat-auto-parser-analyzer.cpp @@ -0,0 +1,1461 @@ +#include "chat-auto-parser-helpers.h" +#include "chat-auto-parser.h" +#include "chat.h" +#include "log.h" +#include "nlohmann/json.hpp" + +using json = nlohmann::ordered_json; + +template_analysis_result template_analyzer::analyze_template(const common_chat_template & tmpl) { + LOG_DBG("=== STARTING UNIFIED TEMPLATE ANALYSIS ===\n"); + + template_analysis_result result; + + // Phase 1: Analyze content and reasoning structure (no tools involved) + result.content = analyze_content_structure(tmpl); + + // Phase 2: Analyze tool call structure (layered on Phase 1) + result.tools = analyze_tool_structure(tmpl, result.content); + + // Post-processing: Extract reasoning markers from tool_section_start if Phase 1 didn't detect them + // Some templates (like Command-R7B) include reasoning markers in tool outputs but not in prompts + if (result.content.reasoning_start.empty() && !result.tools.tool_section_start.empty()) { + // Known reasoning end marker patterns that might be embedded in tool_section_start + std::vector> reasoning_patterns = { + { "<|START_THINKING|>", "<|END_THINKING|>" }, + { "<|START_THOUGHT|>", "<|END_THOUGHT|>" }, + { "<|START_REASON|>", "<|END_REASON|>" }, + { "", "" }, + { "", "" }, + }; + + for (const auto & [start_marker, end_marker] : reasoning_patterns) { + size_t end_pos = result.tools.tool_section_start.find(end_marker); + if (end_pos != std::string::npos) { + // Found reasoning end marker in tool_section_start + // Extract it and clean up tool_section_start + result.content.reasoning_start = start_marker; + result.content.reasoning_end = end_marker; + result.content.reasoning_mode = content_structure::REASONING_OPTIONAL; + + // Clean up tool_section_start: remove everything before and including the end marker + size_t after_end = end_pos + end_marker.length(); + if (after_end < result.tools.tool_section_start.length()) { + result.tools.tool_section_start = result.tools.tool_section_start.substr(after_end); + // Trim leading whitespace + size_t first_non_ws = result.tools.tool_section_start.find_first_not_of(" \t\n\r"); + if (first_non_ws != std::string::npos && first_non_ws > 0) { + result.tools.tool_section_start = result.tools.tool_section_start.substr(first_non_ws); + } + } + + LOG_DBG("Post-processing: Extracted reasoning markers from tool_section_start\n"); + LOG_DBG(" reasoning_start: '%s', reasoning_end: '%s'\n", result.content.reasoning_start.c_str(), + result.content.reasoning_end.c_str()); + LOG_DBG(" cleaned tool_section_start: '%s'\n", result.tools.tool_section_start.c_str()); + break; + } + } + } + + // Post-processing: Detect content markers for recipient-based format + // For recipient-based format, content is prefixed with tool_call_start_marker + recipient_name + \n + // (e.g., ">>>all\n"). We need to detect and extract this as the content_start marker. + if (result.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED && + result.content.content_start.empty() && !result.tools.tool_section_start.empty()) { + // Render template with content only (no tools) to detect the content marker + templates_params inputs; + inputs.messages = { + { { "role", "user" }, { "content", "Hello" } }, + { { "role", "assistant" }, { "content", "ACTUAL_CONTENT_HERE" } } + }; + inputs.add_generation_prompt = true; + + std::string output; + try { + output = common_chat_template_direct_apply(tmpl, inputs); + } catch (...) { + output = ""; + } + + if (!output.empty()) { + // Find where the actual content starts + size_t content_pos = output.find("ACTUAL_CONTENT_HERE"); + + if (content_pos != std::string::npos) { + // For recipient-based format, find the last occurrence of tool_call_start_marker + // before the content. The marker is from that position to the content (including the newline). + size_t marker_pos = output.rfind(result.tools.tool_section_start, content_pos); + + if (marker_pos != std::string::npos && marker_pos < content_pos) { + // Find the newline after the marker + size_t newline_pos = output.find('\n', marker_pos); + + if (newline_pos != std::string::npos && newline_pos < content_pos) { + // Extract everything up to and including the newline after the marker + std::string detected_marker = output.substr(marker_pos, newline_pos - marker_pos + 1); + + // Verify the marker starts with tool_call_start_marker + if (detected_marker.find(result.tools.tool_section_start) == 0) { + result.content.content_start = detected_marker; + result.content.content_mode = content_structure::CONTENT_ALWAYS_WRAPPED; + LOG_DBG("Post-processing: Detected recipient-based content marker: '%s'\n", + result.content.content_start.c_str()); + } + } + } + } + } + } + + // Collect preserved tokens from both phases + collect_preserved_tokens(result); + + LOG_DBG("=== UNIFIED TEMPLATE ANALYSIS COMPLETE ===\n"); + LOG_DBG("Content structure:\n"); + LOG_DBG(" reasoning_mode: %d\n", static_cast(result.content.reasoning_mode)); + LOG_DBG(" reasoning_start: '%s'\n", result.content.reasoning_start.c_str()); + LOG_DBG(" reasoning_end: '%s'\n", result.content.reasoning_end.c_str()); + LOG_DBG(" content_mode: %d\n", static_cast(result.content.content_mode)); + LOG_DBG(" content_start: '%s'\n", result.content.content_start.c_str()); + LOG_DBG(" content_end: '%s'\n", result.content.content_end.c_str()); + LOG_DBG("Tool structure:\n"); + LOG_DBG(" supports_tools: %s\n", result.tools.supports_tools ? "true" : "false"); + LOG_DBG(" function_format: %d\n", static_cast(result.tools.function_format)); + LOG_DBG(" argument_format: %d\n", static_cast(result.tools.argument_format)); + LOG_DBG(" tool_section_start: '%s'\n", result.tools.tool_section_start.c_str()); + LOG_DBG(" tool_section_end: '%s'\n", result.tools.tool_section_end.c_str()); + + return result; +} + +content_structure template_analyzer::analyze_content_structure(const common_chat_template & tmpl) { + LOG_DBG("=== PHASE 1: ANALYZING CONTENT STRUCTURE ===\n"); + + content_structure cs; + + // Step 1: Detect reasoning markers by toggling enable_thinking + detect_reasoning_markers(tmpl, cs); + + // Step 2: Detect content wrapping markers + detect_content_markers(tmpl, cs); + + // Step 3: Determine reasoning mode (NONE, OPTIONAL, FORCED_OPEN) + templates_params inputs; + inputs.messages = { + { { "role", "user" }, { "content", "Hello" } } + }; + inputs.add_generation_prompt = true; + inputs.enable_thinking = true; + + std::string prompt; + try { + prompt = common_chat_template_direct_apply(tmpl, inputs); + } catch (...) { + LOG_DBG("Failed to render template for reasoning mode detection\n"); + return cs; + } + + cs.reasoning_mode = detect_reasoning_mode(cs, prompt); + + LOG_DBG("Phase 1 complete: reasoning_mode=%d, content_mode=%d\n", static_cast(cs.reasoning_mode), + static_cast(cs.content_mode)); + + return cs; +} + +void template_analyzer::detect_reasoning_markers(const common_chat_template & tmpl, content_structure & cs) { + LOG_DBG("=== DETECTING REASONING MARKERS ===\n"); + + // Method 1: Compare outputs with reasoning_content field present vs absent + json reasoning_msg = { + { "role", "assistant" }, + { "content", "CONTENT_MARKER" }, + { "reasoning_content", "THOUGHT_MARKER" } + }; + + json base_msg = { + { "role", "assistant" }, + { "content", "CONTENT_MARKER" } + }; + + templates_params inputs; + + inputs.messages = { reasoning_msg }; + std::string reasoning_output; + try { + reasoning_output = common_chat_template_direct_apply(tmpl, inputs); + } catch (...) { + LOG_DBG("Failed to render template with reasoning_content\n"); + reasoning_output = ""; + } + + inputs.messages = { base_msg }; + std::string base_output; + try { + base_output = common_chat_template_direct_apply(tmpl, inputs); + } catch (...) { + LOG_DBG("Failed to render base template\n"); + base_output = ""; + } + + // If outputs differ and we can find THOUGHT_MARKER, extract the reasoning markers + if (!reasoning_output.empty() && reasoning_output != base_output) { + size_t thought_pos = reasoning_output.find("THOUGHT_MARKER"); + size_t content_pos = reasoning_output.find("CONTENT_MARKER"); + + if (thought_pos != std::string::npos && content_pos != std::string::npos && content_pos > thought_pos) { + // Extract what's between THOUGHT_MARKER and CONTENT_MARKER as the end marker + size_t thought_end = thought_pos + strlen("THOUGHT_MARKER"); + cs.reasoning_end = reasoning_output.substr(thought_end, content_pos - thought_end); + + // Find what's before THOUGHT_MARKER by comparing with base_output + size_t diff_start = 0; + while (diff_start < base_output.length() && diff_start < reasoning_output.length() && + base_output[diff_start] == reasoning_output[diff_start]) { + diff_start++; + } + + // If diff_start is in the middle of a tag (previous char is '<'), back up to include it + // This handles cases like base="" vs reasoning="" where both share '<' + if (diff_start > 0 && diff_start < reasoning_output.length() && + reasoning_output[diff_start - 1] == '<') { + diff_start--; + } + + if (diff_start < thought_pos) { + cs.reasoning_start = reasoning_output.substr(diff_start, thought_pos - diff_start); + } + + trim_whitespace(cs.reasoning_start); + trim_whitespace(cs.reasoning_end); + + // If we found reasoning_end but not reasoning_start, try to derive it from reasoning_end + // For example: -> , -> <|START_THINKING|> + if (cs.reasoning_start.empty() && !cs.reasoning_end.empty()) { + // First, try to derive directly from the closing tag format + if (cs.reasoning_end.length() > 3 && cs.reasoning_end[0] == '<' && cs.reasoning_end[1] == '/') { + // Standard XML closing tag like -> + size_t tag_end_pos = cs.reasoning_end.find('>'); + if (tag_end_pos != std::string::npos) { + std::string tag_name = cs.reasoning_end.substr(2, tag_end_pos - 2); + cs.reasoning_start = "<" + tag_name + ">"; + LOG_DBG("Method 1: Derived reasoning_start from closing tag format\n"); + LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); + } + } else if (cs.reasoning_end.find("<|END_") == 0 || cs.reasoning_end.find("<|/") == 0) { + // Special token format like <|END_THINKING|> -> <|START_THINKING|> + // or <|/think|> -> <|think|> + if (cs.reasoning_end.find("<|END_") == 0) { + std::string core = cs.reasoning_end.substr(6); // Remove "<|END_" + cs.reasoning_start = "<|START_" + core; + } else { + std::string core = cs.reasoning_end.substr(3); // Remove "<|/" + cs.reasoning_start = "<|" + core; + } + LOG_DBG("Method 1: Derived reasoning_start from special token format\n"); + LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); + } + } + + if (!cs.reasoning_start.empty()) { + LOG_DBG("Method 1: Found reasoning markers via reasoning_content field\n"); + LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); + } + } + } + + // Method 2: Compare prompts with enable_thinking true vs false + if (cs.reasoning_start.empty()) { + LOG_DBG("Method 1 failed, trying Method 2 (enable_thinking toggle)\n"); + + json user_msg = { + { "role", "user" }, + { "content", "Hello" } + }; + + templates_params inputs_prompt; + inputs_prompt.messages = { user_msg }; + inputs_prompt.add_generation_prompt = true; + inputs_prompt.enable_thinking = false; + std::string prompt_no_think; + try { + prompt_no_think = common_chat_template_direct_apply(tmpl, inputs_prompt); + } catch (...) { + prompt_no_think = ""; + } + + inputs_prompt.enable_thinking = true; + std::string prompt_think; + try { + prompt_think = common_chat_template_direct_apply(tmpl, inputs_prompt); + } catch (...) { + prompt_think = ""; + } + + if (!prompt_think.empty() && prompt_think != prompt_no_think) { + // Find the difference - this should be the reasoning start marker + size_t diff_pos = 0; + while (diff_pos < prompt_no_think.length() && diff_pos < prompt_think.length() && + prompt_no_think[diff_pos] == prompt_think[diff_pos]) { + diff_pos++; + } + + // Check which direction has extra content + if (prompt_think.length() > prompt_no_think.length()) { + // Normal case: enable_thinking=true adds content (e.g., at the end) + std::string diff = prompt_think.substr(diff_pos); + + // Only use if it looks like a tag + if (diff.find('<') != std::string::npos || diff.find('[') != std::string::npos) { + cs.reasoning_start = diff; + cs.reasoning_end = create_closing_tag(diff); + trim_whitespace(cs.reasoning_start); + trim_whitespace(cs.reasoning_end); + + LOG_DBG("Method 2: Found reasoning markers via enable_thinking toggle\n"); + LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); + } + } else { + // Reverse case: enable_thinking=false adds content (e.g., GLM-4.6 adds ) + // This means the template adds an empty thinking block when thinking is disabled + std::string diff = prompt_no_think.substr(diff_pos); + + // Look for adjacent opening and closing tags like + size_t open_start = diff.find('<'); + if (open_start != std::string::npos) { + size_t open_end = diff.find('>', open_start); + if (open_end != std::string::npos) { + std::string opening_tag = diff.substr(open_start, open_end - open_start + 1); + // Skip if it looks like a role marker + if (opening_tag.find("assistant") == std::string::npos && + opening_tag.find("user") == std::string::npos && + opening_tag.find("system") == std::string::npos) { + std::string expected_close = create_closing_tag(opening_tag); + // Check if the closing tag follows immediately (empty thinking block) + size_t close_pos = diff.find(expected_close, open_end + 1); + if (close_pos != std::string::npos) { + // Verify only whitespace between tags + std::string between = diff.substr(open_end + 1, close_pos - open_end - 1); + bool only_ws = true; + for (char c : between) { + if (!std::isspace(static_cast(c))) { + only_ws = false; + break; + } + } + if (only_ws) { + cs.reasoning_start = opening_tag; + cs.reasoning_end = expected_close; + trim_whitespace(cs.reasoning_start); + trim_whitespace(cs.reasoning_end); + + LOG_DBG("Method 2: Found reasoning markers via enable_thinking toggle (reverse)\n"); + LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), + cs.reasoning_end.c_str()); + } + } + } + } + } + } + } + } + + // Method 3: Check if the prompt ends with an unclosed reasoning tag + if (cs.reasoning_start.empty()) { + LOG_DBG("Method 2 failed, trying Method 3 (prompt ending with open tag)\n"); + + json user_msg = { + { "role", "user" }, + { "content", "Hello" } + }; + + templates_params inputs_prompt; + inputs_prompt.messages = { user_msg }; + inputs_prompt.add_generation_prompt = true; + inputs_prompt.enable_thinking = true; + + std::string prompt; + try { + prompt = common_chat_template_direct_apply(tmpl, inputs_prompt); + } catch (...) { + prompt = ""; + } + + if (!prompt.empty()) { + // Save trailing whitespace before trimming + std::string trailing_ws; + size_t end_pos = prompt.length(); + while (end_pos > 0 && (prompt[end_pos - 1] == '\n' || prompt[end_pos - 1] == '\r')) { + trailing_ws = prompt[end_pos - 1] + trailing_ws; + end_pos--; + } + + trim_trailing_newlines(prompt); + + // Find the last tag in the prompt + size_t last_open_angle = prompt.rfind('<'); + size_t last_close_angle = prompt.rfind('>'); + + // Check for closed tags at the end + if (last_open_angle != std::string::npos && last_close_angle != std::string::npos && + last_close_angle == prompt.length() - 1 && last_close_angle > last_open_angle) { + std::string tag = prompt.substr(last_open_angle); + + // Check if this looks like a reasoning tag (not a role marker) + std::vector blacklisted_tags = { + "<|CHATBOT_TOKEN|>", "<|SYSTEM_TOKEN|>", "<|USER_TOKEN|>", "<|ASSISTANT_TOKEN|>", "<|im_start|>", + "<|im_end|>", "<|start_of_role|>", "<|end_of_role|>", "<|end_of_text|>", "<|end|>", + "<|assistant|>", "<|user|>", "<|system|>", "", "", + "" + }; + + bool is_blacklisted = false; + for (const auto & blacklisted : blacklisted_tags) { + if (tag == blacklisted) { + is_blacklisted = true; + break; + } + } + + // Check if it looks like a thinking/reasoning tag + std::string lower_tag = tag; + std::transform(lower_tag.begin(), lower_tag.end(), lower_tag.begin(), ::tolower); + bool looks_like_reasoning = lower_tag.find("think") != std::string::npos || + lower_tag.find("reason") != std::string::npos || + lower_tag.find("thought") != std::string::npos; + + if (!is_blacklisted && looks_like_reasoning) { + // Check if the detected tag is a close tag (starts with when thinking is disabled + bool is_close_tag = (tag.size() > 2 && tag[0] == '<' && tag[1] == '/'); + + if (is_close_tag) { + // The tag is a close tag (e.g., ) + // Derive the open tag by removing the '/' + std::string tag_name = extract_tag_name(tag); // Returns "/think" for + if (!tag_name.empty() && tag_name[0] == '/') { + tag_name = tag_name.substr(1); // Remove leading '/' + } + cs.reasoning_start = "<" + tag_name + ">"; + cs.reasoning_end = tag; + trim_whitespace(cs.reasoning_start); + trim_whitespace(cs.reasoning_end); + + LOG_DBG("Method 3: Found reasoning markers via prompt ending with CLOSE tag\n"); + LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); + + // Note: The prompt ends with the close tag, meaning thinking is disabled. + // The reasoning_mode will be set in detect_reasoning_mode() which will + // correctly identify this as NOT forced open since the prompt ends with + // the end marker, not the start marker. + } else { + // Standard case: open tag at the end (e.g., ) + cs.reasoning_start = tag + trailing_ws; + cs.reasoning_end = create_closing_tag(tag) + trailing_ws; + trim_whitespace(cs.reasoning_start); + trim_whitespace(cs.reasoning_end); + + LOG_DBG("Method 3: Found reasoning markers via prompt ending with tag\n"); + LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); + } + } + } + } + } + + // Method 4: Look for adjacent opening/closing tag pairs with common content in prompt + // This detects patterns like , <|START_THINKING|><|END_THINKING|>, [think][/think] + if (cs.reasoning_start.empty()) { + LOG_DBG("Method 3 failed, trying Method 4 (adjacent tag pairs with common content)\n"); + + json user_msg = { + { "role", "user" }, + { "content", "Hello" } + }; + + templates_params inputs_prompt; + inputs_prompt.messages = { user_msg }; + inputs_prompt.add_generation_prompt = true; + // Try with thinking disabled - templates may output empty thinking blocks + inputs_prompt.enable_thinking = false; + + std::string prompt; + try { + prompt = common_chat_template_direct_apply(tmpl, inputs_prompt); + } catch (...) { + prompt = ""; + } + + if (!prompt.empty()) { + // Look for patterns like or ... where tag1 and tag2 share a common word + // Common patterns: + // + // <|START_THINKING|><|END_THINKING|> + // [think][/think] + + // Find potential tag pairs by looking for closing tags that immediately follow opening tags + // Pattern: opening tag followed by closing tag with same keyword + std::vector> tag_patterns = { + // (opening pattern, closing pattern, keyword to match) + { "<|START_", "<|END_", "THINKING" }, + { "<|START_", "<|END_", "THOUGHT" }, + { "<|START_", "<|END_", "REASON" }, + { "", "", "" }, + { "", "", "" }, + { "", "", "" }, + { "[think]", "[/think]", "" }, + { "[THINK]", "[/THINK]", "" }, + { "", "", "" }, + { "", "", "" }, + { "<|think|>", "<|/think|>", "" }, + }; + + for (const auto & [open_prefix, close_prefix, keyword] : tag_patterns) { + size_t open_pos = prompt.find(open_prefix); + if (open_pos == std::string::npos) { + continue; + } + + std::string start_tag; + std::string end_tag; + + if (!keyword.empty()) { + // Pattern like <|START_THINKING|><|END_THINKING|> + std::string full_open = open_prefix + keyword; + size_t full_open_pos = prompt.find(full_open); + if (full_open_pos == std::string::npos) { + continue; + } + + // Find the end of this tag (look for |> or >) + size_t tag_end = prompt.find("|>", full_open_pos + full_open.length()); + if (tag_end == std::string::npos) { + tag_end = prompt.find('>', full_open_pos + full_open.length()); + } + if (tag_end == std::string::npos) { + continue; + } + + start_tag = + prompt.substr(full_open_pos, tag_end - full_open_pos + (prompt[tag_end] == '|' ? 2 : 1)); + + // Look for the corresponding end tag + std::string expected_close = close_prefix + keyword; + size_t close_pos = prompt.find(expected_close, tag_end); + if (close_pos == std::string::npos) { + continue; + } + + // Find end of close tag + size_t close_end = prompt.find("|>", close_pos + expected_close.length()); + if (close_end == std::string::npos) { + close_end = prompt.find('>', close_pos + expected_close.length()); + } + if (close_end == std::string::npos) { + continue; + } + + end_tag = prompt.substr(close_pos, close_end - close_pos + (prompt[close_end] == '|' ? 2 : 1)); + } else { + // Simple pattern like + start_tag = open_prefix; + size_t close_pos = prompt.find(close_prefix, open_pos + start_tag.length()); + if (close_pos == std::string::npos) { + continue; + } + end_tag = close_prefix; + } + + // Verify the tags are adjacent or nearly adjacent (only whitespace between) + size_t start_end_pos = prompt.find(start_tag) + start_tag.length(); + size_t end_start_pos = prompt.find(end_tag, start_end_pos); + if (end_start_pos != std::string::npos) { + std::string between = prompt.substr(start_end_pos, end_start_pos - start_end_pos); + // Allow only whitespace between the tags (empty thinking block) + bool only_whitespace = true; + for (char c : between) { + if (!std::isspace(static_cast(c))) { + only_whitespace = false; + break; + } + } + + if (only_whitespace) { + cs.reasoning_start = start_tag; + cs.reasoning_end = end_tag; + LOG_DBG("Method 4: Found reasoning markers via adjacent tag pairs\n"); + LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); + break; + } + } + } + } + } + + if (cs.reasoning_start.empty()) { + LOG_DBG("No reasoning markers detected\n"); + } +} + +void template_analyzer::detect_content_markers(const common_chat_template & tmpl, content_structure & cs) { + LOG_DBG("=== DETECTING CONTENT MARKERS ===\n"); + + // Render template with a unique content marker + json user_msg = { + { "role", "user" }, + { "content", "Hello" } + }; + json assistant_msg = { + { "role", "assistant" }, + { "content", "UNIQUE_CONTENT_12345" } + }; + + templates_params inputs; + inputs.messages = { user_msg, assistant_msg }; + // Try with thinking enabled first (some templates only wrap content when reasoning is present) + inputs.extra_context["thinking"] = true; + inputs.enable_thinking = true; + + std::string output_with_thinking; + try { + output_with_thinking = common_chat_template_direct_apply(tmpl, inputs); + } catch (...) { + output_with_thinking = ""; + } + + // Also render without thinking + inputs.extra_context["thinking"] = false; + inputs.enable_thinking = false; + + std::string output_no_thinking; + try { + output_no_thinking = common_chat_template_direct_apply(tmpl, inputs); + } catch (...) { + output_no_thinking = ""; + } + + // Check both outputs for content markers + auto find_content_markers = [&](const std::string & output) -> std::pair { + size_t marker_pos = output.find("UNIQUE_CONTENT_12345"); + if (marker_pos == std::string::npos) { + return { "", "" }; + } + + // Known content marker patterns + std::vector> patterns = { + { "<|START_RESPONSE|>", "<|END_RESPONSE|>" }, + { "<|response|>", "<|/response|>" }, + { "", "" }, + { "", "" }, + { "", "" }, + { "<|CHATBOT_TOKEN|>", "<|END_OF_TURN_TOKEN|>" }, + }; + + for (const auto & [start_pattern, end_pattern] : patterns) { + size_t start_pos = output.rfind(start_pattern, marker_pos); + if (start_pos != std::string::npos) { + // Check that there's only whitespace between the start pattern and our marker + std::string between = + output.substr(start_pos + start_pattern.length(), marker_pos - start_pos - start_pattern.length()); + size_t first_non_ws = between.find_first_not_of(" \t\n\r"); + if (first_non_ws == std::string::npos) { + // Found valid start marker, look for end marker + size_t marker_end = marker_pos + strlen("UNIQUE_CONTENT_12345"); + size_t end_pos = output.find(end_pattern, marker_end); + if (end_pos != std::string::npos) { + std::string after = output.substr(marker_end, end_pos - marker_end); + size_t first_non_ws_after = after.find_first_not_of(" \t\n\r"); + if (first_non_ws_after == std::string::npos) { + return { start_pattern, end_pattern }; + } + } + } + } + } + + return { "", "" }; + }; + + auto [start_with_thinking, end_with_thinking] = find_content_markers(output_with_thinking); + auto [start_no_thinking, end_no_thinking] = find_content_markers(output_no_thinking); + + if (!start_with_thinking.empty() && !start_no_thinking.empty()) { + // Content is always wrapped + cs.content_mode = content_structure::CONTENT_ALWAYS_WRAPPED; + cs.content_start = start_with_thinking; + cs.content_end = end_with_thinking; + LOG_DBG("Content markers found in both thinking modes (ALWAYS_WRAPPED)\n"); + } else if (!start_with_thinking.empty() && start_no_thinking.empty()) { + // Content is wrapped only when reasoning is present + cs.content_mode = content_structure::CONTENT_WRAPPED_WITH_REASONING; + cs.content_start = start_with_thinking; + cs.content_end = end_with_thinking; + LOG_DBG("Content markers found only with thinking enabled (WRAPPED_WITH_REASONING)\n"); + } else if (!start_no_thinking.empty()) { + // Unusual: content wrapped without thinking but not with? Use what we found + cs.content_mode = content_structure::CONTENT_ALWAYS_WRAPPED; + cs.content_start = start_no_thinking; + cs.content_end = end_no_thinking; + LOG_DBG("Content markers found only without thinking (treating as ALWAYS_WRAPPED)\n"); + } else { + cs.content_mode = content_structure::CONTENT_PLAIN; + LOG_DBG("No content markers detected (PLAIN)\n"); + } + + LOG_DBG("Content markers: start='%s', end='%s'\n", cs.content_start.c_str(), cs.content_end.c_str()); +} + +content_structure::reasoning_mode_type template_analyzer::detect_reasoning_mode(const content_structure & cs, + const std::string & prompt) { + LOG_DBG("=== DETECTING REASONING MODE ===\n"); + + // If both markers are empty, mode is NONE + if (cs.reasoning_start.empty() && cs.reasoning_end.empty()) { + LOG_DBG("No reasoning markers, mode=REASONING_NONE\n"); + return content_structure::REASONING_NONE; + } + + // Handle case with end marker but no start marker (implicit start) + if (cs.reasoning_start.empty() && !cs.reasoning_end.empty()) { + LOG_DBG("Reasoning end marker present but no start marker, mode=REASONING_FORCED_OPEN\n"); + return content_structure::REASONING_FORCED_OPEN; + } + + // Check if the prompt ends with the reasoning start marker (forced open) + std::string trimmed_prompt = prompt; + trim_trailing_newlines(trimmed_prompt); + + std::string trimmed_marker = cs.reasoning_start; + trim_whitespace(trimmed_marker); + + if (string_ends_with(trimmed_prompt, trimmed_marker)) { + LOG_DBG("Prompt ends with reasoning start marker, mode=REASONING_FORCED_OPEN\n"); + return content_structure::REASONING_FORCED_OPEN; + } + + // Otherwise, reasoning is optional + LOG_DBG("Reasoning markers present but not forced, mode=REASONING_OPTIONAL\n"); + return content_structure::REASONING_OPTIONAL; +} + +tool_call_structure template_analyzer::analyze_tool_structure(const common_chat_template & tmpl, + const content_structure & content) { + (void) content; // May be used in future for better tool detection + + LOG_DBG("=== PHASE 2: ANALYZING TOOL STRUCTURE ===\n"); + + tool_call_structure ts; + + // Use differential analysis to detect tool patterns + // This now includes a robust test that renders two payloads: + // 1. Tool definitions + content only + // 2. Tool definitions + content + tool calls + // If outputs are identical, the template doesn't support tool calls + auto discovered = analyze_by_differential(tmpl); + auto format = determine_format_from_patterns(discovered); + + // Strip EOS tokens from discovered patterns (handles both standard <|eos|> and fullwidth <|end▁of▁sentence|>) + if (!discovered.tool_call_closer.empty()) { + LOG_DBG("Before stripping: tool_call_closer='%s' (len=%zu)\n", discovered.tool_call_closer.c_str(), + discovered.tool_call_closer.length()); + discovered.tool_call_closer = strip_eos_token(discovered.tool_call_closer); + LOG_DBG("After stripping: tool_call_closer='%s'\n", discovered.tool_call_closer.c_str()); + } + if (!discovered.tool_call_end_marker.empty()) { + discovered.tool_call_end_marker = strip_eos_token(discovered.tool_call_end_marker); + } + + if (format == FORMAT_UNKNOWN) { + LOG_DBG("Template does not support tool calls (differential analysis returned no patterns)\n"); + ts.supports_tools = false; + return ts; + } + + // Propagate requires_nonnull_content flag from differential analysis + ts.requires_nonnull_content = discovered.requires_nonnull_content; + if (ts.requires_nonnull_content) { + LOG_DBG("Template requires non-null content (renders null as 'None')\n"); + } + + // Check if minja reports tool call support (for informational purposes) + auto caps = tmpl.original_caps(); + if (!caps.supports_tool_calls) { + LOG_DBG("Note: minja caps indicate no tool support, but differential analysis found patterns\n"); + } + + if (format == FORMAT_JSON_NATIVE) { + analyze_json_format(ts, discovered); + } else if (format == FORMAT_XML_CONSTRUCTED) { + analyze_xml_format(ts, discovered); + } else if (format == FORMAT_BRACKET_TAG) { + analyze_bracket_tag_format(ts, discovered); + } else if (format == FORMAT_RECIPIENT_BASED) { + analyze_recipient_based_format(ts, discovered); + } else if (format == FORMAT_MARKDOWN_CODE_BLOCK) { + analyze_markdown_code_block_format(ts, discovered); + } + + return ts; +} + +void template_analyzer::collect_preserved_tokens(template_analysis_result & result) { + LOG_DBG("=== COLLECTING PRESERVED TOKENS ===\n"); + + std::vector tokens; + + // Add reasoning markers + if (!result.content.reasoning_start.empty()) { + tokens.push_back(result.content.reasoning_start); + } + if (!result.content.reasoning_end.empty()) { + tokens.push_back(result.content.reasoning_end); + } + + // Add content markers + if (!result.content.content_start.empty()) { + tokens.push_back(result.content.content_start); + } + if (!result.content.content_end.empty()) { + tokens.push_back(result.content.content_end); + } + + // Add tool section markers + if (!result.tools.tool_section_start.empty()) { + tokens.push_back(result.tools.tool_section_start); + } + if (!result.tools.tool_section_end.empty()) { + tokens.push_back(result.tools.tool_section_end); + } + + // Add function markers for tag-based formats + if (result.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) { + if (!result.tools.function_prefix.empty()) { + tokens.push_back(result.tools.function_prefix); + } + if (!result.tools.function_close.empty()) { + tokens.push_back(result.tools.function_close); + } + } + + // Add markers for prefixed-indexed formats (e.g., Kimi-K2) + if (result.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { + if (!result.tools.per_call_start.empty()) { + tokens.push_back(result.tools.per_call_start); + } + if (!result.tools.args_marker.empty()) { + tokens.push_back(result.tools.args_marker); + } + if (!result.tools.per_call_end.empty()) { + tokens.push_back(result.tools.per_call_end); + } + } + + // Add argument markers for tagged formats + if (result.tools.argument_format == tool_call_structure::ARGS_TAGGED) { + if (!result.tools.arg_prefix.empty()) { + tokens.push_back(result.tools.arg_prefix); + } + if (!result.tools.arg_close.empty()) { + tokens.push_back(result.tools.arg_close); + } + } + + // Add markers for markdown code block format (Cohere Command-R Plus) + if (result.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK) { + if (!result.tools.code_block_marker.empty()) { + tokens.push_back(result.tools.code_block_marker); + } + if (!result.tools.tool_section_end.empty()) { + tokens.push_back(result.tools.tool_section_end); // Closing code fence ``` + } + } + + result.preserved_tokens = tokens; + LOG_DBG("Collected %zu preserved tokens\n", tokens.size()); +} + +void template_analyzer::analyze_json_format(tool_call_structure & ts, const internal_discovered_pattern & discovered) { + ts.supports_tools = true; + ts.function_format = tool_call_structure::FUNC_JSON_OBJECT; + ts.argument_format = tool_call_structure::ARGS_JSON; + ts.tool_section_start = discovered.tool_call_start_marker; + ts.tool_section_end = discovered.tool_call_end_marker; + ts.name_field = discovered.tool_name_field; + ts.args_field = discovered.tool_args_field; + ts.id_field = discovered.tool_id_field; + + // Check for FUNC_NAME_AS_KEY format (e.g. Apertus: {"function_name": args}) + // This is characterized by the opener ending in {" and no explicit name field found yet + if (!discovered.tool_call_opener.empty() && discovered.tool_call_opener.length() >= 2 && + discovered.tool_call_opener.substr(discovered.tool_call_opener.length() - 2) == "{\"") { + LOG_DBG("Detected FUNC_NAME_AS_KEY format from tool_call_opener ending in '{\"' \n"); + ts.function_format = tool_call_structure::FUNC_NAME_AS_KEY; + } + + // For JSON_NATIVE format, clean up tool_section_end to only include the closing tag + // The differential analysis may include JSON closing braces (e.g., "}}\n") + // but the parser handles JSON separately, so we only need the tag marker + if (!ts.tool_section_end.empty()) { + size_t tag_start = ts.tool_section_end.find("', tag_start); + if (tag_end != std::string::npos) { + // Check if there is a closing bracket ']' before the tag + size_t bracket_pos = ts.tool_section_end.rfind(']', tag_start); + if (bracket_pos != std::string::npos) { + // Include the bracket + ts.tool_section_end = ts.tool_section_end.substr(bracket_pos, tag_end - bracket_pos + 1); + } else { + ts.tool_section_end = ts.tool_section_end.substr(tag_start, tag_end - tag_start + 1); + } + } + } else { + // Try other closing patterns like ]<|END_ACTION|> + tag_start = ts.tool_section_end.find("<|"); + if (tag_start != std::string::npos) { + size_t tag_end = ts.tool_section_end.find("|>", tag_start); + if (tag_end != std::string::npos) { + // Include the opening bracket if present + size_t bracket_pos = ts.tool_section_end.rfind(']', tag_start); + if (bracket_pos != std::string::npos && bracket_pos + 1 == tag_start) { + ts.tool_section_end = ts.tool_section_end.substr(bracket_pos, tag_end - bracket_pos + 2); + } else { + ts.tool_section_end = ts.tool_section_end.substr(tag_start, tag_end - tag_start + 2); + } + } + } + } + } +} + +void template_analyzer::analyze_xml_format(tool_call_structure & ts, const internal_discovered_pattern & discovered) { + ts.supports_tools = true; + ts.function_format = tool_call_structure::FUNC_TAG_WITH_NAME; + ts.tool_section_start = discovered.tool_call_start_marker; + ts.tool_section_end = discovered.tool_call_end_marker; + + // Extract function tag patterns + if (!discovered.function_opener.empty()) { + char first = discovered.function_opener[0]; + if (first != '<' && first != '{' && first != '[') { + // Non-XML/JSON prefix format (e.g., ">>>", "##", etc.) + // Function name follows prefix directly, ends with newline + ts.function_prefix = discovered.function_opener; + ts.function_suffix = "\n"; // Function name typically ends with newline + ts.function_close = ""; // No closing tag for prefix formats + } else { + size_t eq_pos = discovered.function_opener.find('='); + if (eq_pos != std::string::npos) { + // Check if there's a quote after the equals sign + if (eq_pos + 1 < discovered.function_opener.length() && + (discovered.function_opener[eq_pos + 1] == '"' || discovered.function_opener[eq_pos + 1] == '\'')) { + ts.function_prefix = discovered.function_opener.substr(0, eq_pos + 2); + } else { + ts.function_prefix = discovered.function_opener.substr(0, eq_pos + 1); + } + ts.function_suffix = discovered.function_name_suffix; + + // For formats like {args}, where function_prefix + // IS the section start (no separate wrapper), tool_section_end is the function close. + // But for nested formats like ..., + // the function_close is separate from tool_section_end. + // We detect the non-nested case when tool_section_start matches function_prefix + // (or tool_section_start was already cleared because it matched). + bool section_start_matches_prefix = ts.tool_section_start.empty() || + ts.tool_section_start.find(ts.function_prefix) == 0 || + ts.function_prefix.find(ts.tool_section_start) == 0; + if (section_start_matches_prefix && ts.function_prefix.find('<') == 0 && !ts.tool_section_end.empty() && + ts.tool_section_end.find("functions.name:0<|tool_call_argument_begin|> + size_t namespace_dot = discovered.function_opener.rfind('.'); + bool has_namespace = + (namespace_dot != std::string::npos && namespace_dot == discovered.function_opener.length() - 1); + + bool has_index = + (!discovered.function_name_suffix.empty() && discovered.function_name_suffix[0] == ':' && + discovered.function_name_suffix.length() > 1 && + std::isdigit(static_cast(discovered.function_name_suffix[1]))); + + if (has_namespace && has_index) { + LOG_DBG("Detected FUNC_PREFIXED_INDEXED format: namespace ends with '.', suffix has ':N' index\n"); + ts.function_format = tool_call_structure::FUNC_PREFIXED_INDEXED; + + // Split function_opener into per_call_start and function_namespace + // e.g., "<|tool_call_begin|>functions." -> "<|tool_call_begin|>" + "functions." + // Find where the namespace starts (after the last '>' before the '.') + size_t namespace_start = discovered.function_opener.rfind('>'); + if (namespace_start != std::string::npos && namespace_start < namespace_dot) { + ts.per_call_start = discovered.function_opener.substr(0, namespace_start + 1); + ts.function_namespace = discovered.function_opener.substr(namespace_start + 1); + } else { + // Fallback: namespace is just the part ending with '.' + ts.per_call_start = discovered.function_opener.substr(0, namespace_dot); + ts.function_namespace = "."; + } + + // Extract args_marker from function_name_suffix + // Format: ":0<|some_marker|>" -> index is ":0", args_marker is "<|some_marker|>" + size_t args_marker_start = discovered.function_name_suffix.find('<'); + if (args_marker_start != std::string::npos) { + size_t args_marker_end = discovered.function_name_suffix.find('>', args_marker_start); + if (args_marker_end != std::string::npos) { + ts.args_marker = discovered.function_name_suffix.substr( + args_marker_start, args_marker_end - args_marker_start + 1); + } + } + + // Derive per_call_end from tool_call_closer by finding corresponding end marker + // tool_call_closer contains per_call_end + tool_section_end + // We find per_call_end by looking for a marker that structurally matches per_call_start + if (!discovered.tool_call_closer.empty() && !ts.per_call_start.empty()) { + // Extract structural pattern from per_call_start + // e.g., "<|tool_call_begin|>" -> look for "<|tool_call_...|>" in closer + size_t start_marker_begin = ts.per_call_start.find("<|"); + size_t start_marker_end = ts.per_call_start.rfind("|>"); + if (start_marker_begin != std::string::npos && start_marker_end != std::string::npos) { + // Find the base pattern (e.g., "<|tool_call" from "<|tool_call_begin|>") + std::string start_content = ts.per_call_start.substr( + start_marker_begin + 2, start_marker_end - start_marker_begin - 2); + // Find a related marker in the closer + size_t closer_pos = discovered.tool_call_closer.find("<|"); + while (closer_pos != std::string::npos) { + size_t closer_end = discovered.tool_call_closer.find("|>", closer_pos); + if (closer_end != std::string::npos) { + std::string candidate = + discovered.tool_call_closer.substr(closer_pos, closer_end - closer_pos + 2); + // Check if this marker shares a common prefix with per_call_start + // (ignoring _begin vs _end suffix differences) + std::string candidate_content = candidate.substr(2, candidate.length() - 4); + // Find common prefix between start_content and candidate_content + size_t common_len = 0; + while (common_len < start_content.length() && + common_len < candidate_content.length() && + start_content[common_len] == candidate_content[common_len]) { + common_len++; + } + // If substantial overlap (>50%), this is likely the per_call_end + if (common_len > start_content.length() / 2 && + candidate_content.find("end") != std::string::npos) { + ts.per_call_end = candidate; + break; + } + } + closer_pos = discovered.tool_call_closer.find("<|", closer_pos + 1); + } + } + } + + // Derive tool_section_end from tool_section_start by finding matching end marker + // For FUNC_PREFIXED_INDEXED, we always derive this to get the correct marker + // (the default discovered.tool_call_end_marker may contain extra content) + if (!ts.tool_section_start.empty()) { + size_t start_marker_begin = ts.tool_section_start.find("<|"); + size_t start_marker_end = ts.tool_section_start.rfind("|>"); + if (start_marker_begin != std::string::npos && start_marker_end != std::string::npos) { + std::string start_content = ts.tool_section_start.substr( + start_marker_begin + 2, start_marker_end - start_marker_begin - 2); + size_t closer_pos = discovered.tool_call_closer.find("<|"); + while (closer_pos != std::string::npos) { + size_t closer_end = discovered.tool_call_closer.find("|>", closer_pos); + if (closer_end != std::string::npos) { + std::string candidate = + discovered.tool_call_closer.substr(closer_pos, closer_end - closer_pos + 2); + std::string candidate_content = candidate.substr(2, candidate.length() - 4); + size_t common_len = 0; + while (common_len < start_content.length() && + common_len < candidate_content.length() && + start_content[common_len] == candidate_content[common_len]) { + common_len++; + } + if (common_len > start_content.length() / 2 && + candidate_content.find("end") != std::string::npos) { + ts.tool_section_end = candidate; + break; + } + } + closer_pos = discovered.tool_call_closer.find("<|", closer_pos + 1); + } + } + } + + LOG_DBG( + "FUNC_PREFIXED_INDEXED: per_call_start='%s', namespace='%s', args_marker='%s', " + "per_call_end='%s'\n", + ts.per_call_start.c_str(), ts.function_namespace.c_str(), ts.args_marker.c_str(), + ts.per_call_end.c_str()); + } else { + // Other formats like <|tool_call_begin|>name (non-indexed) + // Use function_opener as default, but try to use full tool_call_opener if it contains more + ts.function_prefix = discovered.function_opener; + LOG_DBG("Initial function_prefix: '%s', tool_call_opener: '%s', tool_section_start: '%s'\n", + ts.function_prefix.c_str(), discovered.tool_call_opener.c_str(), + ts.tool_section_start.c_str()); + if (!ts.tool_section_start.empty() && + discovered.tool_call_opener.find(ts.tool_section_start) == 0) { + std::string remainder = discovered.tool_call_opener.substr(ts.tool_section_start.length()); + LOG_DBG("Derived remainder: '%s'\n", remainder.c_str()); + if (remainder.length() > ts.function_prefix.length()) { + ts.function_prefix = remainder; + } + } + ts.function_suffix = discovered.function_name_suffix; + ts.function_close = discovered.function_closer; + } + } + } + } + + // Fix for templates where tool_section_start matches function_prefix (double wrapping) + // e.g. Functionary: tool_section_start="<|tool▁call▁begin|>function + // We need to derive tool_section_end from the outer marker pattern + if (ts.function_suffix.find("```") != std::string::npos && !ts.tool_section_start.empty()) { + // Check if tool_section_start contains nested markers (both outer and per-call) + // Pattern: ... + // We look for "calls" pattern which indicates an outer container + size_t calls_pos = ts.tool_section_start.find("calls"); + if (calls_pos != std::string::npos && calls_pos < ts.tool_section_start.length()) { + // Find where the outer marker ends (after the first >) + size_t first_close = ts.tool_section_start.find('>', calls_pos); + if (first_close != std::string::npos && first_close < ts.tool_section_start.length() - 1) { + // Extract the outer marker (e.g., "<|tool▁calls▁begin|>") + std::string outer_start = ts.tool_section_start.substr(0, first_close + 1); + // Derive the outer end marker by replacing "begin" with "end" + size_t begin_pos = outer_start.find("begin"); + if (begin_pos != std::string::npos) { + std::string outer_end = + outer_start.substr(0, begin_pos) + "end" + outer_start.substr(begin_pos + 5); + ts.tool_section_end = outer_end; + + // Strip outer marker from function_prefix and function_opener if they were combined + if (ts.tool_section_start.find(outer_start) == 0) { + std::string remainder = ts.tool_section_start.substr(outer_start.length()); + // Trim leading whitespace from remainder + size_t first_non_ws = remainder.find_first_not_of(" \t\n\r"); + if (first_non_ws != std::string::npos && first_non_ws > 0) { + remainder = remainder.substr(first_non_ws); + } + + // Concatenate with existing function_prefix (e.g. separator tag) + // but avoid double-concatenation if already present + if (!remainder.empty() && ts.function_prefix.find(remainder) == std::string::npos) { + ts.function_prefix = remainder + ts.function_prefix; + } + } + + // Update tool_section_start to be just the outer marker + ts.tool_section_start = outer_start; + + // Check if there's a fence in tool_call_closer that should be in function_close + // (DeepSeek R1 wraps JSON in markdown blocks within the custom tags) + if (discovered.tool_call_closer.find("```") != std::string::npos) { + size_t fence_pos = discovered.tool_call_closer.find("```"); + // Include leading newlines if present before the fence + while (fence_pos > 0 && (discovered.tool_call_closer[fence_pos - 1] == '\n' || + discovered.tool_call_closer[fence_pos - 1] == '\r')) { + fence_pos--; + } + ts.function_close = discovered.tool_call_closer.substr(fence_pos); + + // Clip function_close to not include tool_section_end (if they were combined in differential analysis) + if (!ts.tool_section_end.empty()) { + size_t end_pos = ts.function_close.find(ts.tool_section_end); + if (end_pos != std::string::npos) { + ts.function_close = ts.function_close.substr(0, end_pos); + } + } + + // Further trim any trailing EOS or prompt garbage + ts.function_close = strip_eos_token(ts.function_close); + size_t prompt_garbage = ts.function_close.find("<|"); + if (prompt_garbage != std::string::npos && prompt_garbage > 0 && + ts.function_close.substr(prompt_garbage).find("Assistant") != std::string::npos) { + ts.function_close = ts.function_close.substr(0, prompt_garbage); + } + } + } + } + } + } + + // General cleanup for tool_section_end when tool_section_start uses token markers (<|...|> or <|...|>) + // If tool_section_start contains a token marker with "begin" and tool_section_end is messy (contains } + // or multiple markers), derive tool_section_end by finding matching end marker in tool_call_closer + if (!ts.tool_section_start.empty() && !discovered.tool_call_closer.empty()) { + // Check if tool_section_start contains a token marker + size_t start_opener_pos = find_token_opener(ts.tool_section_start, 0); + size_t start_closer_pos = find_token_closer(ts.tool_section_start, start_opener_pos); + if (start_opener_pos != std::string::npos && start_closer_pos != std::string::npos) { + size_t opener_len = get_token_opener_length(ts.tool_section_start, start_opener_pos); + // Extract the token content (between opener and closer) + std::string start_content = ts.tool_section_start.substr(start_opener_pos + opener_len, + start_closer_pos - start_opener_pos - opener_len); + + // Check if tool_section_end needs cleanup (starts with } or contains multiple markers) + bool needs_cleanup = false; + if (!ts.tool_section_end.empty() && ts.tool_section_end[0] == '}') { + needs_cleanup = true; + } + // Count tokens in tool_section_end + size_t token_count = 0; + size_t pos = 0; + while ((pos = find_token_opener(ts.tool_section_end, pos)) != std::string::npos) { + token_count++; + pos += get_token_opener_length(ts.tool_section_end, pos); + } + if (token_count > 1) { + needs_cleanup = true; + } + + if (needs_cleanup) { + // Find matching end marker in tool_call_closer + // Look for a token that has similar content but with "end" instead of "begin" + pos = 0; + while ((pos = find_token_opener(discovered.tool_call_closer, pos)) != std::string::npos) { + size_t end_closer_pos = find_token_closer(discovered.tool_call_closer, pos); + if (end_closer_pos != std::string::npos) { + size_t op_len = get_token_opener_length(discovered.tool_call_closer, pos); + size_t cl_len = get_token_closer_length(discovered.tool_call_closer, end_closer_pos); + std::string candidate = discovered.tool_call_closer.substr(pos, end_closer_pos + cl_len - pos); + std::string candidate_content = + discovered.tool_call_closer.substr(pos + op_len, end_closer_pos - pos - op_len); + + // Check if this candidate matches our start marker structure + // Start content might be "tool▁calls▁begin" and candidate might be "tool▁calls▁end" + size_t begin_in_start = start_content.find("begin"); + size_t end_in_candidate = candidate_content.find("end"); + if (begin_in_start != std::string::npos && end_in_candidate != std::string::npos) { + // Check if they share a common prefix (e.g., "tool▁calls▁") + std::string start_base = start_content.substr(0, begin_in_start); + std::string cand_base = candidate_content.substr(0, end_in_candidate); + if (start_base == cand_base) { + ts.tool_section_end = candidate; + LOG_DBG( + "Derived tool_section_end='%s' from tool_section_start='%s' using token matching\n", + ts.tool_section_end.c_str(), ts.tool_section_start.c_str()); + break; + } + } + } + pos += get_token_opener_length(discovered.tool_call_closer, pos); + } + } + } + } + + // Determine argument format + if (!discovered.parameter_key_prefix.empty() && discovered.parameter_key_prefix.find('<') != std::string::npos) { + ts.argument_format = tool_call_structure::ARGS_TAGGED; + ts.arg_prefix = discovered.parameter_key_prefix; + ts.arg_suffix = discovered.parameter_key_suffix; + ts.arg_close = discovered.parameter_closer; + ts.arg_separator = discovered.argument_separator; + + // Check for specific GLM-4 style key-value tags + // Format: key\nvalue + // Analyzer detects suffix as: \n + if (ts.arg_suffix.find("") != std::string::npos) { + ts.argument_format = tool_call_structure::ARGS_KEY_VALUE_TAGS; + + // Clean up suffix to be just the key closer + size_t val_opener = ts.arg_suffix.find(""); + if (val_opener != std::string::npos) { + // Extract just the part (trimming whitespace/newlines before ) + std::string key_closer = ts.arg_suffix.substr(0, val_opener); + // Trim trailing whitespace/newlines + while (!key_closer.empty() && + (key_closer.back() == '\n' || key_closer.back() == '\r' || key_closer.back() == ' ')) { + key_closer.pop_back(); + } + ts.arg_suffix = key_closer; + } + } + } else { + ts.argument_format = tool_call_structure::ARGS_JSON; + } + + LOG_DBG("%s: final markers: section_start='%s', section_end='%s', prefix='%s', close='%s'\n", __func__, + ts.tool_section_start.c_str(), ts.tool_section_end.c_str(), ts.function_prefix.c_str(), + ts.function_close.c_str()); +} + +void template_analyzer::analyze_bracket_tag_format(tool_call_structure & ts, + const internal_discovered_pattern & discovered) { + // Bracket-tag format: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} (Mistral Small 3.2) + ts.supports_tools = true; + ts.function_format = tool_call_structure::FUNC_BRACKET_TAG; + ts.argument_format = tool_call_structure::ARGS_JSON; + + // The function_opener contains the bracket tag before the function name (e.g., "[TOOL_CALLS]") + // Each tool call starts with this tag, so it's the per_call_start, not a section wrapper + // tool_section_start/end should be empty since there's no overall section wrapper + ts.tool_section_start = ""; + ts.tool_section_end = ""; + ts.per_call_start = discovered.function_opener; + + // Extract markers from function_name_suffix (e.g., "[CALL_ID]call_0001[ARGS]" or just "[ARGS]") + // Pattern: [ID_MARKER]...[ARGS_MARKER] or just [ARGS_MARKER] + if (!discovered.function_name_suffix.empty()) { + // Find all bracket tags in the suffix + std::vector tags; + size_t pos = 0; + while ((pos = discovered.function_name_suffix.find('[', pos)) != std::string::npos) { + size_t end = discovered.function_name_suffix.find(']', pos); + if (end != std::string::npos) { + tags.push_back(discovered.function_name_suffix.substr(pos, end - pos + 1)); + pos = end + 1; + } else { + break; + } + } + + // Classify tags: args marker contains "ARG", id marker contains "ID" or "CALL" + for (const auto & tag : tags) { + std::string upper_tag = tag; + for (auto & c : upper_tag) { + c = static_cast(std::toupper(static_cast(c))); + } + if (upper_tag.find("ARG") != std::string::npos) { + ts.args_marker = tag; + } else if (upper_tag.find("ID") != std::string::npos || upper_tag.find("CALL") != std::string::npos) { + ts.id_marker = tag; + } + } + } + + LOG_DBG("FUNC_BRACKET_TAG: per_call_start='%s', id_marker='%s', args_marker='%s'\n", ts.per_call_start.c_str(), + ts.id_marker.c_str(), ts.args_marker.c_str()); +} + +void template_analyzer::analyze_recipient_based_format(tool_call_structure & ts, + const internal_discovered_pattern & discovered) { + // Recipient-based format (Functionary v3.2): >>>recipient\n{content} + // where recipient is either "all" (for content) or a function name (for tools) + ts.supports_tools = true; + ts.function_format = tool_call_structure::FUNC_RECIPIENT_BASED; + ts.argument_format = tool_call_structure::ARGS_JSON; // Python dict format, parse as JSON + + // The tool_call_start_marker is used as the recipient delimiter + ts.tool_section_start = discovered.tool_call_start_marker; + ts.tool_section_end = ""; + + // For recipient-based format, content is wrapped in tool_call_start_marker + "all\n" + // This needs to be detected and stripped. We detect this by checking if the + // content_start marker (from phase 1 analysis) starts with tool_call_start_marker + // If not already detected, infer it from the pattern. + // Note: This is set on the ContentStructure result, not ToolCallStructure + // The caller (analyze_template) will have the ContentStructure to modify + + LOG_DBG("FUNC_RECIPIENT_BASED: delimiter='%s'\n", ts.tool_section_start.c_str()); +} + +void template_analyzer::analyze_markdown_code_block_format(tool_call_structure & ts, + const internal_discovered_pattern & discovered) { + // Markdown code block format (Cohere Command-R Plus): + // Action: + // ```json + // [ + // { + // "tool_name": "...", + // "parameters": {...} + // } + // ] + // ``` + ts.supports_tools = true; + ts.function_format = tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK; + ts.argument_format = tool_call_structure::ARGS_JSON; + + // Extract the code block marker (e.g., "Action:") + // The tool_call_start_marker should contain "Action:" followed by newline + if (!discovered.tool_call_start_marker.empty()) { + // Extract just the marker text (e.g., "Action:") + // The marker may be followed by whitespace/newline in the template + size_t marker_end = discovered.tool_call_start_marker.find_first_of(" \n\r\t"); + if (marker_end != std::string::npos) { + ts.code_block_marker = discovered.tool_call_start_marker.substr(0, marker_end); + } else { + ts.code_block_marker = discovered.tool_call_start_marker; + } + } + + // Extract the code block language (e.g., "json") + // For Command-R Plus format: Action:\n```json\n[...] + // The code fence is in tool_call_opener (before the function name), not function_name_suffix + if (!discovered.function_name_suffix.empty() && discovered.function_name_suffix.find("```") != std::string::npos) { + // Format: ```json or ```json\n + size_t code_fence_pos = discovered.function_name_suffix.find("```"); + size_t lang_start = code_fence_pos + 3; + // Find the end of the language identifier (newline, space, or end of string) + size_t lang_end = discovered.function_name_suffix.find_first_of(" \n\r\t", lang_start); + if (lang_end != std::string::npos && lang_end > lang_start) { + ts.code_block_language = discovered.function_name_suffix.substr(lang_start, lang_end - lang_start); + } else { + // No language identifier after ```, will use "json" as default + ts.code_block_language = "json"; + } + } else if (!discovered.tool_call_opener.empty() && discovered.tool_call_opener.find("```") != std::string::npos) { + // Code fence is in tool_call_opener (before the function name) + // Format: Action:\n```json\n[... + size_t code_fence_pos = discovered.tool_call_opener.find("```"); + size_t lang_start = code_fence_pos + 3; + // Find the end of the language identifier (newline, space, or end of string) + size_t lang_end = discovered.tool_call_opener.find_first_of(" \n\r\t", lang_start); + if (lang_end != std::string::npos && lang_end > lang_start) { + ts.code_block_language = discovered.tool_call_opener.substr(lang_start, lang_end - lang_start); + } else { + // No language identifier after ```, will use "json" as default + ts.code_block_language = "json"; + } + } else { + // Default to "json" if no code fence found + ts.code_block_language = "json"; + } + + // The tool_section_end should be the closing code fence: ``` + if (!discovered.tool_call_closer.empty() && discovered.tool_call_closer.find("```") != std::string::npos) { + // Extract just the closing code fence (may have trailing content) + size_t fence_pos = discovered.tool_call_closer.find("```"); + size_t fence_end = fence_pos + 3; + // Include any non-newline characters after ``` (like language identifier if present) + while (fence_end < discovered.tool_call_closer.length() && discovered.tool_call_closer[fence_end] != '\n' && + discovered.tool_call_closer[fence_end] != '\r') { + fence_end++; + } + ts.tool_section_end = discovered.tool_call_closer.substr(fence_pos, fence_end - fence_pos); + } else { + // Default closing code fence + ts.tool_section_end = "```"; + } + + // JSON array format for function calls + ts.name_field = discovered.tool_name_field; + ts.args_field = discovered.tool_args_field; + ts.id_field = discovered.tool_id_field; + + LOG_DBG("FUNC_MARKDOWN_CODE_BLOCK: marker='%s', language='%s', section_end='%s'\n", ts.code_block_marker.c_str(), + ts.code_block_language.c_str(), ts.tool_section_end.c_str()); +} diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp new file mode 100644 index 0000000000..0f4d153d06 --- /dev/null +++ b/common/chat-auto-parser-generator.cpp @@ -0,0 +1,250 @@ +#include "chat-auto-parser-helpers.h" +#include "chat-auto-parser.h" +#include "chat-peg-parser.h" +#include "chat.h" +#include "json-schema-to-grammar.h" +#include "log.h" +#include "nlohmann/json.hpp" + +#include + +using json = nlohmann::ordered_json; + +common_chat_params universal_peg_generator::generate_parser(const template_analysis_result & analysis, + const common_chat_template & tmpl, + const struct templates_params & inputs) { + common_chat_params data; + + try { + LOG_DBG("%s\n", __func__); + + // Patch messages if template requires non-null content + // Some templates (e.g., iquest) render null as "None" when concatenating strings + std::optional messages_override; + if (analysis.tools.requires_nonnull_content && !inputs.messages.empty()) { + LOG_DBG("Patching null content to empty string (template requires non-null content)\n"); + json patched_messages = inputs.messages; + for (auto & msg : patched_messages) { + if (msg.contains("content") && msg["content"].is_null()) { + msg["content"] = ""; + } + } + messages_override = patched_messages; + } + + if (inputs.messages.empty()) { + // Some templates don't handle empty messages well - always leave something in + json message = { + { { "role", "user" }, { "content", "Hello" } } + }; + messages_override.emplace(message); + } + + // Calculate prompt first to detect forced thinking + data.prompt = common_chat_template_direct_apply(tmpl, inputs, messages_override); + + // Determine if thinking is forced open based on prompt ending + bool thinking_forced_open = false; + if (analysis.content.reasoning_mode == content_structure::REASONING_FORCED_OPEN) { + if (inputs.enable_thinking) { + thinking_forced_open = true; + LOG_DBG("Thinking forced open based on template analysis\n"); + } else { + // Template ends with reasoning start marker but thinking is disabled + // Append the end marker to close it + data.prompt += analysis.content.reasoning_end; + LOG_DBG("Appended reasoning end marker since thinking is disabled\n"); + } + } + data.thinking_forced_open = thinking_forced_open; + + // Build the unified parser + auto arena = build_parser(analysis, tmpl, inputs, thinking_forced_open); + data.parser = arena.save(); + + // Determine format + bool has_tools = + inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; + + if (has_tools && analysis.tools.supports_tools) { + // Unified format that handles both JSON and tagged tool calls + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + LOG_DBG("Generated unified parser with tool support (format: PEG_NATIVE)\n"); + } else if (analysis.content.reasoning_mode != content_structure::REASONING_NONE) { + // Reasoning markers detected - use PEG parser to handle thinking blocks + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + LOG_DBG("Generated unified parser for reasoning handling (format: PEG_NATIVE)\n"); + } else if (analysis.content.content_mode != content_structure::CONTENT_PLAIN) { + // Content markers detected - use PEG parser to strip them even without tools + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + LOG_DBG("Generated unified parser for content marker stripping (format: PEG_NATIVE)\n"); + } else if (analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) { + // Recipient-based format (e.g., Functionary v3.2): >>>recipient\n{content} + // Need PEG parser to handle recipient delimiter parsing + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + LOG_DBG("Generated unified parser for recipient-based format (format: PEG_NATIVE)\n"); + } else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) { + // Tag-with-name format (e.g., func_name\n{args} for Functionary) + // Need PEG parser to handle function name parsing + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + LOG_DBG("Generated unified parser for tag-with-name format (format: PEG_NATIVE)\n"); + } else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) { + // Bracket-tag format (e.g., [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} for Mistral Small 3.2) + // Need PEG parser to handle bracket tag parsing + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + LOG_DBG("Generated unified parser for bracket-tag format (format: PEG_NATIVE)\n"); + } else if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { + // Prefixed-indexed format (e.g., Kimi-K2) + // Need PEG parser to handle namespace and indexed format + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + LOG_DBG("Generated unified parser for prefixed-indexed format (format: PEG_NATIVE)\n"); + } else { + data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + LOG_DBG("Generated unified parser without tools or content markers (format: CONTENT_ONLY)\n"); + } + + // Determine trigger word for lazy grammar + std::string trigger_word; + if (!analysis.tools.tool_section_start.empty() || + analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) { + trigger_word = analysis.tools.tool_section_start; + } else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) { + trigger_word = analysis.tools.function_prefix; + } else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG || + analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { + // For formats with per-call markers, use per_call_start as trigger + trigger_word = analysis.tools.per_call_start; + } + + // Build grammar for tool calls + data.grammar_lazy = analysis.tools.supports_tools && has_tools; + + // For FUNC_TAG_WITH_NAME with empty prefix (Functionary), disable lazy grammar + // since there's no clear trigger word - constrain from the start + if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME && + analysis.tools.function_prefix.empty()) { + data.grammar_lazy = false; + } + + if (data.grammar_lazy) { + if (!trigger_word.empty()) { + data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_word }); + } + } + + // Build grammar + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + if (inputs.tools.is_array()) { + for (const auto & tool : inputs.tools) { + if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) { + continue; + } + const auto & function = tool.at("function"); + if (function.contains("parameters")) { + auto params = function.at("parameters"); + builder.resolve_refs(params); + } + } + } + arena.build_grammar(builder, data.grammar_lazy); + }); + + // Set preserved tokens from analysis + data.preserved_tokens = analysis.preserved_tokens; + + LOG_DBG("=== UNIFIED PEG PARSER GENERATION COMPLETED ===\n"); + + } catch (const std::exception & e) { + LOG_DBG("Unified parser generation failed: %s\n", e.what()); + throw; + } + + return data; +} + +common_peg_arena universal_peg_generator::build_parser(const template_analysis_result & analysis, + const common_chat_template & tmpl, + const struct templates_params & inputs, + bool thinking_forced_open) { + GGML_UNUSED(tmpl); + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + // Build reasoning block using ContentStructure + auto reasoning = p.build_reasoning_block(analysis.content, inputs.reasoning_format, thinking_forced_open); + + // Build content block using ContentStructure + // Note: we don't pass tool_section_start here because content-before-tools handling + // is done inline in each branch below with p.content(p.until(marker)) + auto content = p.build_content_block(analysis.content, inputs.reasoning_format); + + // Build tool section using ToolCallStructure (if applicable) + bool has_tools = + inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; + + if (has_tools && analysis.tools.supports_tools) { + bool force_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + auto tool_section = + p.build_tool_section(analysis.tools, inputs.tools, inputs.parallel_tool_calls, force_calls); + + // Compose: reasoning -> content before tools -> tool_section -> trailing content + // When thinking is forced open, the reasoning block expects . + // For tool-only messages (no thinking content), the model may output tools directly + // without the tag, so we need to make reasoning optional in that case. + // But if reasoning_format is NONE, the reasoning block is already eps() - don't wrap it + // in optional() as that would generate invalid grammar. + auto reasoning_for_tools = + (thinking_forced_open && inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE) ? + p.optional(reasoning) : + reasoning; + + if (!analysis.tools.tool_section_start.empty()) { + // With section markers: look for start marker to delimit content + auto content_before_tools = p.content(p.until(analysis.tools.tool_section_start)); + return p.sequence({ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, + p.space(), p.optional(p.content(p.rest())), p.end() }); + } + if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME && + !analysis.tools.function_prefix.empty()) { + // Tag-with-name format (e.g., >>>func_name): content stops at function prefix + auto content_before_tools = p.content(p.until(analysis.tools.function_prefix)); + return p.sequence( + { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() }); + } + if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) { + // Functionary-style format: tool call starts immediately (e.g., func_name\n{args}) + // No content before tools in this format - the entire output is the tool call + return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() }); + } + if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG || + analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { + // Bracket-tag (Mistral Small 3.2) or prefixed-indexed (Kimi-K2) format: + // Tool calls start with per_call_start marker (e.g., [TOOL_CALLS], <|tool_call_begin|>) + if (!analysis.tools.per_call_start.empty()) { + auto content_before_tools = p.content(p.until(analysis.tools.per_call_start)); + return p.sequence( + { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() }); + } + // Fallback: no content before tools + return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() }); + } + if (analysis.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK && + !analysis.tools.code_block_marker.empty()) { + // Markdown code block format (Cohere Command-R Plus): + // Content stops at the code_block_marker (e.g., "Action:") + auto content_before_tools = p.content(p.until(analysis.tools.code_block_marker)); + return p.sequence( + { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() }); + } + // No section markers (raw JSON format): content must stop at JSON object start + // Tool calls start with "{", so use that as a delimiter + auto content_before_tools = p.content(p.until("{")); + return p.sequence( + { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() }); + } + + // No tools - just reasoning (if any) followed by content + return p.sequence({ reasoning, p.space(), content, p.end() }); + }); + + return parser; +} diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp new file mode 100644 index 0000000000..c63012c2a8 --- /dev/null +++ b/common/chat-auto-parser-helpers.cpp @@ -0,0 +1,1419 @@ +#include "chat-auto-parser-helpers.h" + +#include "chat-auto-parser.h" +#include "chat.h" +#include "log.h" + +#include "nlohmann/json.hpp" + +using json = nlohmann::ordered_json; + +bool string_ends_with(const std::string & str, const std::string & suffix) { + return str.size() >= suffix.size() && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; +} + +void trim_whitespace(std::string & str) { + if (str.empty()) { + return; + } + size_t first = str.find_first_not_of(" \n\t\r"); + if (first == std::string::npos) { + str.clear(); + return; + } + size_t last = str.find_last_not_of(" \n\t\r"); + str = str.substr(first, (last - first + 1)); +} + +void trim_trailing_newlines(std::string & str) { + while (!str.empty() && (str.back() == '\n' || str.back() == '\r')) { + str.pop_back(); + } +} + +size_t count_non_whitespace(const std::string & str) { + size_t count = 0; + for (char c : str) { + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { + count++; + } + } + return count; +} + +size_t find_last_of_any(const std::string & str, const std::string & chars, size_t start_pos) { + size_t last_pos = std::string::npos; + for (char c : chars) { + size_t pos = str.rfind(c, start_pos); + if (pos != std::string::npos && (last_pos == std::string::npos || pos > last_pos)) { + last_pos = pos; + } + } + return last_pos; +} + +std::string extract_tag_name(const std::string & tag) { + if (tag.empty() || tag[0] != '<') { + return ""; + } + std::string tag_name = tag.substr(1); + size_t end_bracket = tag_name.find_first_of(" >"); + if (end_bracket != std::string::npos) { + tag_name = tag_name.substr(0, end_bracket); + } + return tag_name; +} + +std::string create_closing_tag(const std::string & opening_tag) { + if (opening_tag.empty()) { + return ""; + } + if (opening_tag[0] == '<') { + std::string name = extract_tag_name(opening_tag); + return ""; + } + if (opening_tag.front() == '[' && opening_tag.back() == ']') { + std::string name = opening_tag.substr(1, opening_tag.length() - 2); + return "[/" + name + "]"; + } + return ""; +} + +std::string find_common_prefix(const std::vector & strings) { + if (strings.empty()) { + return ""; + } + if (strings.size() == 1) { + return strings[0]; + } + + std::string common = strings[0]; + for (size_t i = 1; i < strings.size(); ++i) { + const std::string & current = strings[i]; + std::string temp_common; + for (size_t j = 0; j < common.length() && j < current.length(); ++j) { + if (common[j] == current[j]) { + temp_common += common[j]; + } else { + break; + } + } + common = temp_common; + } + return common; +} + +std::string find_common_suffix_generic(const std::vector & strings) { + if (strings.empty()) { + return ""; + } + if (strings.size() == 1) { + return strings[0]; + } + + std::string common = strings[0]; + for (size_t i = 1; i < strings.size(); ++i) { + const std::string & current = strings[i]; + std::string temp_common; + size_t min_len = std::min(common.length(), current.length()); + for (size_t j = 0; j < min_len; ++j) { + size_t pos_common = common.length() - j - 1; + size_t pos_current = current.length() - j - 1; + if (common[pos_common] == current[pos_current]) { + temp_common = common[pos_common] + temp_common; + } else { + break; + } + } + common = temp_common; + } + return common; +} + +std::string find_common_substring_limited(const std::vector & strings, + size_t max_length, + const std::string & delimiters) { + std::string common = find_common_prefix(strings); + if (common.length() > max_length) { + size_t pos = find_last_of_any(common, delimiters, common.length() - 1); + if (pos != std::string::npos && pos > 0) { + return common.substr(0, pos + 1); + } + return common.substr(0, max_length); + } + return common; +} + +std::string apply_template(common_chat_template & tmpl, + const struct templates_params & inputs, + const std::optional & messages_override, + const std::optional & tools_override, + const std::optional & additional_context) { + struct templates_params final_inputs(inputs); + final_inputs.messages = messages_override ? *messages_override : inputs.messages; + if (tools_override) { + final_inputs.tools = *tools_override; + } else { + final_inputs.tools = inputs.tools.empty() ? json() : inputs.tools; + } + final_inputs.add_generation_prompt = inputs.add_generation_prompt; + final_inputs.extra_context = inputs.extra_context; + final_inputs.extra_context["enable_thinking"] = inputs.enable_thinking; + if (additional_context) { + final_inputs.extra_context.merge_patch(*additional_context); + } + + try { + return common_chat_template_direct_apply(tmpl, inputs); + } catch (const std::exception & e) { + LOG_ERR("Template application failed: %s\n", e.what()); + return ""; + } +} + +std::string adjust_to_token_boundary(const std::string & str) { + if (str.empty()) { + return str; + } + + // Check if the string ends in the middle of a <|...|> token + // Look for unmatched <| at the end + + // Find the last <| in the string + size_t last_open = str.rfind("<|"); + if (last_open == std::string::npos) { + return str; // No special tokens + } + + // Find if there's a |> after the last <| + size_t matching_close = str.find("|>", last_open + 2); + if (matching_close != std::string::npos) { + // The token is complete, return as-is + return str; + } + + // The string is truncated mid-token + // Truncate to just before the incomplete token + std::string result = str.substr(0, last_open); + + // Trim any trailing whitespace + while (!result.empty() && (result.back() == ' ' || result.back() == '\t' || result.back() == '\n')) { + result.pop_back(); + } + + return result; +} + +// Fullwidth vertical bar: | (U+FF5C) is 3 bytes in UTF-8: 0xEF 0xBD 0x9C +static const std::string FULLWIDTH_PIPE = "\xef\xbd\x9c"; // | +static const std::string TOKEN_OPENER_STD = "<|"; +static const std::string TOKEN_OPENER_FW = "<" + FULLWIDTH_PIPE; // <| +static const std::string TOKEN_CLOSER_STD = "|>"; +static const std::string TOKEN_CLOSER_FW = FULLWIDTH_PIPE + ">"; // |> + +size_t find_token_opener(const std::string & str, size_t start_pos) { + size_t pos_std = str.find(TOKEN_OPENER_STD, start_pos); + size_t pos_fw = str.find(TOKEN_OPENER_FW, start_pos); + + if (pos_std == std::string::npos) { + return pos_fw; + } + if (pos_fw == std::string::npos) { + return pos_std; + } + return std::min(pos_std, pos_fw); +} + +size_t find_token_closer(const std::string & str, size_t start_pos) { + size_t pos_std = str.find(TOKEN_CLOSER_STD, start_pos); + size_t pos_fw = str.find(TOKEN_CLOSER_FW, start_pos); + + if (pos_std == std::string::npos) { + return pos_fw; + } + if (pos_fw == std::string::npos) { + return pos_std; + } + return std::min(pos_std, pos_fw); +} + +size_t get_token_opener_length(const std::string & str, size_t pos) { + if (pos >= str.length()) { + return 0; + } + if (str.compare(pos, TOKEN_OPENER_FW.length(), TOKEN_OPENER_FW) == 0) { + return TOKEN_OPENER_FW.length(); // 4 bytes for <| + } + if (str.compare(pos, TOKEN_OPENER_STD.length(), TOKEN_OPENER_STD) == 0) { + return TOKEN_OPENER_STD.length(); // 2 bytes for <| + } + return 0; +} + +size_t get_token_closer_length(const std::string & str, size_t pos) { + if (pos >= str.length()) { + return 0; + } + if (str.compare(pos, TOKEN_CLOSER_FW.length(), TOKEN_CLOSER_FW) == 0) { + return TOKEN_CLOSER_FW.length(); // 4 bytes for |> + } + if (str.compare(pos, TOKEN_CLOSER_STD.length(), TOKEN_CLOSER_STD) == 0) { + return TOKEN_CLOSER_STD.length(); // 2 bytes for |> + } + return 0; +} + +std::string strip_eos_token(const std::string & str) { + if (str.empty()) { + return str; + } + + // Find the last token in the string + // We need to find a token that looks like an EOS marker + // Common patterns: + // - <|eot_id|>, <|eos|>, <|end|>, <|endoftext|> + // - <|end▁of▁sentence|> (DeepSeek fullwidth) + + size_t last_closer = std::string::npos; + size_t search_pos = str.length(); + + // Search backwards for the last token closer + while (search_pos > 0) { + // Check for fullwidth closer first (it's longer) + if (search_pos >= TOKEN_CLOSER_FW.length()) { + size_t check_pos = search_pos - TOKEN_CLOSER_FW.length(); + if (str.compare(check_pos, TOKEN_CLOSER_FW.length(), TOKEN_CLOSER_FW) == 0) { + last_closer = check_pos; + break; + } + } + // Check for standard closer + if (search_pos >= TOKEN_CLOSER_STD.length()) { + size_t check_pos = search_pos - TOKEN_CLOSER_STD.length(); + if (str.compare(check_pos, TOKEN_CLOSER_STD.length(), TOKEN_CLOSER_STD) == 0) { + last_closer = check_pos; + break; + } + } + search_pos--; + } + + if (last_closer == std::string::npos) { + return str; // No token closer found + } + + // Find the corresponding opener + size_t opener_search_start = (last_closer > 100) ? last_closer - 100 : 0; + size_t last_opener = std::string::npos; + size_t opener_len = 0; + + for (size_t pos = opener_search_start; pos < last_closer; pos++) { + size_t len = get_token_opener_length(str, pos); + if (len > 0) { + last_opener = pos; + opener_len = len; + } + } + + if (last_opener == std::string::npos) { + return str; // No matching opener found + } + + // Extract the token content to check if it's an EOS marker + size_t closer_len = get_token_closer_length(str, last_closer); + size_t content_start = last_opener + opener_len; + size_t content_length = last_closer - content_start; + + if (content_length == 0 || content_length > 50) { + return str; // Invalid or too long token content + } + + std::string token_content = str.substr(content_start, content_length); + + // Convert to lowercase for comparison (ASCII only, sufficient for EOS markers) + std::string lower_content; + for (char c : token_content) { + lower_content += (c >= 'A' && c <= 'Z') ? (c + 32) : c; + } + + // Check if this looks like an EOS token + // True EOS tokens: + // - <|eos|>, <|eot_id|>, <|end_of_text|>, <|endoftext|> + // - <|end▁of▁sentence|> (DeepSeek fullwidth) + // NOT EOS tokens (structural markers): + // - <|END_ACTION|>, <|TOOL_CALL_END|>, <|end_thinking|>, etc. + + bool is_eos = false; + + // Check for specific EOS patterns + if (lower_content == "eos" || lower_content == "eot_id" || lower_content == "eot" || + lower_content == "end_of_text" || lower_content == "endoftext") { + is_eos = true; + } + // DeepSeek's end_of_sentence uses fullwidth underscore (▁) which is preserved in lower_content + // The token content would be "end▁of▁sentence" (with ▁ = U+2581) + else if (token_content.find("sentence") != std::string::npos || + token_content.find("\xe2\x96\x81of\xe2\x96\x81sentence") != std::string::npos) { + is_eos = true; + } + + if (!is_eos) { + return str; // Not an EOS token + } + + // Strip the EOS token + std::string result = str.substr(0, last_opener); + + LOG_DBG("Stripped EOS token '%s' from string\n", + str.substr(last_opener, last_closer + closer_len - last_opener).c_str()); + + return result; +} + +std::string find_string_difference(const std::string & base, const std::string & extended) { + size_t common_prefix = 0; + while (common_prefix < base.length() && common_prefix < extended.length() && + base[common_prefix] == extended[common_prefix]) { + common_prefix++; + } + return extended.substr(common_prefix); +} + +std::string extract_json_field_name(const std::string & opener, + const std::string & default_name, + const std::vector & candidates) { + for (const auto & candidate : candidates) { + std::string pattern = "\"" + candidate + "\""; + if (opener.find(pattern) != std::string::npos) { + LOG_DBG("Found JSON field name '%s' in opener\n", candidate.c_str()); + return candidate; + } + } + return default_name; +} + +std::string find_closing_pattern(const std::string & diff, size_t func_pos) { + std::vector closers = { "", " " }; + + std::string best_pattern; + size_t best_pos = std::string::npos; + + for (const auto & pattern : closers) { + size_t pos = diff.find(pattern, func_pos); + if (pos != std::string::npos) { + if (pos < best_pos) { + if (pattern == "', pos); + if (end_pos != std::string::npos) { + best_pattern = diff.substr(pos, end_pos - pos + 1); + best_pos = pos; + } + } else { + best_pattern = pattern; + best_pos = pos; + } + } + } + } + return best_pattern; +} + +std::string find_tool_call_start(const std::string & diff) { + std::vector start_patterns = { "<", "[", "{", "call", "func", "tool", "TOOL" }; + for (const auto & pattern : start_patterns) { + size_t pos = diff.find(pattern); + if (pos < 5) { + if (pattern == "<") { + size_t end_pos = diff.find('>', pos); + if (end_pos != std::string::npos) { + return diff.substr(pos, end_pos - pos + 1); + } + } + if (pattern == "[" || pattern == "{") { + size_t chunk_len = std::min(diff.length() - pos, (size_t) 60); + return diff.substr(pos, chunk_len); + } + + size_t end_pos = diff.find_first_of(">]} \n", pos); + if (end_pos != std::string::npos) { + if (diff[end_pos] == '>' || diff[end_pos] == ']' || diff[end_pos] == '}') { + return diff.substr(pos, end_pos - pos + 1); + } + return diff.substr(pos, end_pos - pos); + } + return diff.substr(pos, pattern.length()); + } + } + return ""; +} + +std::string find_tool_call_end(const std::string & diff, size_t func_pos) { + char opener_char = 0; + std::string start_tag_name; + + std::string openers = "[{<"; + size_t last_opener_pos = std::string::npos; + for (char c : openers) { + size_t p = diff.rfind(c, func_pos); + if (p != std::string::npos) { + if (last_opener_pos == std::string::npos || p > last_opener_pos) { + last_opener_pos = p; + opener_char = c; + } + } + } + + size_t unclosed_bracket = diff.rfind('[', func_pos); + if (unclosed_bracket != std::string::npos) { + size_t closer = diff.find(']', unclosed_bracket); + if (closer == std::string::npos || closer > func_pos) { + opener_char = '['; + } + } + + if (opener_char == '<') { + size_t tag_start = diff.find('<', last_opener_pos); + if (tag_start != std::string::npos) { + // Include '=' in search to handle style tags + // where the closing tag is , not + size_t tag_end = diff.find_first_of(" >=\n", tag_start); + if (tag_end != std::string::npos) { + start_tag_name = diff.substr(tag_start + 1, tag_end - (tag_start + 1)); + } + } + } + + if (!start_tag_name.empty()) { + std::string expected_closer = ""; + size_t pos = diff.find(expected_closer, func_pos); + if (pos != std::string::npos) { + if (opener_char == '[') { + size_t bracket_pos = diff.rfind(']', pos); + if (bracket_pos != std::string::npos && bracket_pos > func_pos) { + return diff.substr(bracket_pos, (pos + expected_closer.length()) - bracket_pos); + } + } + return expected_closer; + } + } + + std::vector end_patterns = { "", "```", "\n", " " }; + std::string best_pattern; + size_t best_pos = std::string::npos; + + auto is_structural = [](const std::string & s) { + if (s.empty()) { + return false; + } + return s[0] == ']' || s[0] == '}' || s[0] == '>' || (s.size() >= 2 && s.substr(0, 2) == "= 3 && s.substr(0, 3) == "```"); + }; + + for (const auto & pattern : end_patterns) { + size_t pos = diff.find(pattern, func_pos); + if (pos == std::string::npos) { + continue; + } + + bool current_is_struct = is_structural(pattern); + bool best_is_struct = is_structural(best_pattern); + + bool better = false; + if (best_pattern.empty()) { + better = true; + } else if (pos < best_pos) { + better = !(best_is_struct && !current_is_struct) && + !(opener_char == '[' && best_pattern[0] == ']' && pattern[0] == '}'); + } else { + if (!best_is_struct && current_is_struct && pos < best_pos + 400) { + better = true; + } else if (best_is_struct && current_is_struct && opener_char == '[' && pattern[0] == ']' && + best_pattern[0] == '}') { + if (pos < best_pos + 100) { + better = true; + } + } + } + + if (better) { + best_pattern = pattern; + best_pos = pos; + + if (current_is_struct && (pattern == "]" || pattern == "}" || pattern == "```")) { + size_t tag_start = diff.find('<', best_pos + pattern.length()); + if (tag_start != std::string::npos && tag_start < best_pos + pattern.length() + 5) { + size_t tag_end = diff.find('>', tag_start); + if (tag_end != std::string::npos) { + best_pattern = diff.substr(best_pos, tag_end - best_pos + 1); + } + } + } + } + } + + return best_pattern; +} + +std::string infer_tool_call_opener(const std::string & diff1, const std::string & diff2, const std::string & diff3) { + std::vector differences = { diff1, diff2, diff3 }; + return find_common_prefix(differences); +} + +std::string infer_tool_call_closer(const std::string & diff1, const std::string & diff2, const std::string & diff3) { + std::vector differences = { diff1, diff2, diff3 }; + return find_common_suffix_generic(differences); +} + +internal_discovered_pattern extract_patterns_from_differences(const std::string & tool1_diff, + const std::string & tool2_diff, + const std::string & tool3_diff, + const std::string & tool1_full) { + LOG_DBG("%s\n", __func__); + + internal_discovered_pattern patterns; + + size_t func1_pos = tool1_diff.rfind("test_function_name"); + size_t func2_pos = tool2_diff.rfind("test_function_name"); + + if (func1_pos != std::string::npos && func2_pos != std::string::npos) { + patterns.tool_call_opener = tool1_diff.substr(0, func1_pos); + + if (tool1_full.length() >= tool1_diff.length()) { + size_t diff_start = tool1_full.length() - tool1_diff.length(); + + if (diff_start > 0 && tool1_full[diff_start - 1] == '<' && !patterns.tool_call_opener.empty() && + patterns.tool_call_opener[0] != '<') { + patterns.tool_call_opener = "<" + patterns.tool_call_opener; + } + } + + if (func1_pos == 0 && !tool1_full.empty()) { + size_t func_in_full = tool1_full.rfind("test_function_name"); + if (func_in_full != std::string::npos && func_in_full > 0) { + // Look backwards from function name to find prefix pattern + // Find where the prefix ends (skip whitespace immediately before function name) + size_t prefix_end = func_in_full; + while (prefix_end > 0 && (tool1_full[prefix_end - 1] == ' ' || tool1_full[prefix_end - 1] == '\t')) { + prefix_end--; + } + + // Find where the prefix starts by looking for newline or alphanumeric boundary + size_t prefix_start = prefix_end; + while (prefix_start > 0) { + char c = tool1_full[prefix_start - 1]; + // Stop at newline + if (c == '\n' || c == '\r') { + break; + } + // Stop if we hit alphanumeric (probably content, not a prefix delimiter) + if (std::isalnum(static_cast(c)) || c == '_') { + prefix_start = prefix_end; // Reset - no valid prefix found + break; + } + prefix_start--; + } + + // Extract the prefix if we found something meaningful + if (prefix_start < prefix_end) { + std::string prefix = tool1_full.substr(prefix_start, prefix_end - prefix_start); + // Validate: prefix should contain non-whitespace and be reasonable length + bool has_content = false; + for (char c : prefix) { + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { + has_content = true; + break; + } + } + if (has_content && prefix.length() >= 2 && prefix.length() <= 20) { + LOG_DBG("Found prefix pattern in full output: '%s'\n", prefix.c_str()); + patterns.function_opener = prefix; + patterns.tool_call_start_marker = prefix; + } + } + } + } + + patterns.tool_name_field = extract_json_field_name(patterns.tool_call_opener, "name", + { "tool_name", "name", "function_name", "function" }); + + patterns.tool_args_field = + extract_json_field_name(patterns.tool_call_opener + tool1_diff.substr(func1_pos), "arguments", + { "parameters", "arguments", "args", "params", "input" }); + + patterns.tool_id_field = + extract_json_field_name(tool1_diff, "", { "tool_call_id", "tool_id", "id", "call_id" }); + + size_t param1_pos = tool2_diff.find("\"param1\""); + bool param_has_quotes = (param1_pos != std::string::npos); + size_t param2_pos = tool2_diff.find("\"param2\""); + size_t value1_pos = tool2_diff.find("\"value1\""); + + if (param1_pos == std::string::npos) { + param1_pos = tool2_diff.find("param1"); + } + if (param_has_quotes && param1_pos != std::string::npos) { + param1_pos++; + } + if (param2_pos == std::string::npos) { + param2_pos = tool2_diff.find("param2"); + } + if (param_has_quotes && param2_pos != std::string::npos) { + param2_pos++; + } + if (value1_pos == std::string::npos) { + value1_pos = tool2_diff.find("value1"); + } + // Only skip quote if value was actually found quoted + bool value_has_quotes = (value1_pos != std::string::npos && tool2_diff[value1_pos] == '"'); + if (value_has_quotes) { + value1_pos++; + } + + if (param1_pos != std::string::npos && value1_pos != std::string::npos) { + size_t search_start = (param1_pos > 20) ? param1_pos - 20 : 0; + std::string pre_param = tool2_diff.substr(search_start, param1_pos - search_start); + + size_t delim_pos = pre_param.find_last_of('\n'); + if (delim_pos == std::string::npos) { + delim_pos = pre_param.find_last_of('>'); + } + + if (delim_pos != std::string::npos) { + patterns.parameter_key_prefix = pre_param.substr(delim_pos + 1); + + // If prefix is empty after '>', check for GLM-style key-value tags + // Pattern: param1value1 + // In this case, the '>' ends the opening tag, and we should include the whole tag + if (patterns.parameter_key_prefix.empty() && delim_pos > 0) { + // Look for matching '<' before the '>' + size_t open_bracket = pre_param.rfind('<', delim_pos); + if (open_bracket != std::string::npos) { + // Extract the whole tag as the prefix + patterns.parameter_key_prefix = pre_param.substr(open_bracket); + } + } + } else { + size_t start_marker = pre_param.find_last_of("<{[ \""); + if (start_marker != std::string::npos) { + patterns.parameter_key_prefix = pre_param.substr(start_marker); + } else { + patterns.parameter_key_prefix = pre_param; + } + } + + trim_whitespace(patterns.parameter_key_prefix); + + size_t key_end = param1_pos + std::string("param1").length(); + if (value1_pos > key_end) { + patterns.parameter_key_suffix = tool2_diff.substr(key_end, value1_pos - key_end); + } + + size_t value1_end = value1_pos + std::string("value1").length(); + if (value1_end < tool2_diff.length()) { + // Try to find XML-style closing tag like + size_t close_start = tool2_diff.find("', close_start); + if (close_end != std::string::npos) { + patterns.parameter_closer = tool2_diff.substr(close_start, close_end - close_start + 1); + } + } + } + } + + const std::string & func_context = tool1_diff; + size_t open_pos = func_context.rfind('<', func1_pos); + if (open_pos != std::string::npos && open_pos < func1_pos) { + size_t close_pos = func_context.find('>', open_pos); + if (close_pos != std::string::npos && close_pos < func1_pos) { + bool is_adjacent = true; + for (size_t k = close_pos + 1; k < func1_pos; ++k) { + char c = func_context[k]; + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { + is_adjacent = false; + break; + } + } + if (is_adjacent) { + patterns.function_opener = func_context.substr(open_pos, close_pos - open_pos + 1); + } + } else { + patterns.function_opener = func_context.substr(open_pos, func1_pos - open_pos); + } + } + + if (func1_pos > 0 && patterns.function_opener.empty()) { + size_t prefix_end = func1_pos; + // Skip whitespace immediately before function name + while (prefix_end > 0 && (func_context[prefix_end - 1] == ' ' || func_context[prefix_end - 1] == '\t')) { + prefix_end--; + } + + // Find prefix start - look for newline or alphanumeric boundary + size_t prefix_start = prefix_end; + while (prefix_start > 0) { + char c = func_context[prefix_start - 1]; + if (c == '\n' || c == '\r') { + break; + } + if (std::isalnum(static_cast(c)) || c == '_') { + prefix_start = prefix_end; // Reset - no valid prefix + break; + } + prefix_start--; + } + + if (prefix_start < prefix_end) { + // ... + } + } + + // Fallback: look for standard delimiters + if (patterns.function_opener.empty()) { + for (int i = (int) func1_pos - 1; i >= 0; i--) { + if (func_context[i] == '{' || func_context[i] == '[' || func_context[i] == '(' || + func_context[i] == '<') { + patterns.function_opener = func_context.substr(i, func1_pos - i); + break; + } + } + } + + size_t func_name_end = func1_pos + std::string("test_function_name").length(); + if (func_name_end < func_context.length()) { + char next_char = func_context[func_name_end]; + if (next_char == '>' || next_char == ']' || next_char == '}') { + patterns.function_name_suffix = std::string(1, next_char); + } else if (next_char == '"') { + if (func_name_end + 1 < func_context.length() && func_context[func_name_end + 1] == '>') { + patterns.function_name_suffix = "\">"; + } else { + patterns.function_name_suffix = "\""; + } + } else if (next_char == '<') { + // Check if it's an XML-like tag suffix (e.g. <|tool_call_argument_begin|>) + // But NOT if it's a closing tag (e.g., ) - that should be function_closer + if (func_name_end + 1 < func_context.length() && func_context[func_name_end + 1] == '/') { + // This is a closing tag like , not a suffix + // Leave function_name_suffix empty; function_closer will capture this + } else { + size_t tag_close = func_context.find('>', func_name_end); + if (tag_close != std::string::npos) { + // It seems to be a tag, use it as suffix + patterns.function_name_suffix = func_context.substr(func_name_end, tag_close - func_name_end + 1); + } + } + } else if (next_char == '[') { + // Bracket-tag format: [CALL_ID]id[ARGS] (Mistral Small 3.2 style) + // Find where the JSON arguments start (at '{') + size_t json_start = func_context.find('{', func_name_end); + if (json_start != std::string::npos) { + patterns.function_name_suffix = func_context.substr(func_name_end, json_start - func_name_end); + LOG_DBG("Found bracket-tag suffix: '%s'\n", patterns.function_name_suffix.c_str()); + } + } else if (next_char == ':') { + // Indexed format: function_name:0<|marker|> or function_name:0{args} + // Find where the suffix ends - either at a tag marker or at the JSON args start + size_t suffix_end = func_name_end + 1; + // Skip the index digits + while (suffix_end < func_context.length() && + std::isdigit(static_cast(func_context[suffix_end]))) { + suffix_end++; + } + if (suffix_end < func_context.length()) { + char after_index = func_context[suffix_end]; + if (after_index == '<') { + // There's a marker after the index (e.g., :0<|tool_call_argument_begin|>) + size_t tag_close = func_context.find('>', suffix_end); + if (tag_close != std::string::npos) { + patterns.function_name_suffix = + func_context.substr(func_name_end, tag_close - func_name_end + 1); + } else { + patterns.function_name_suffix = + func_context.substr(func_name_end, suffix_end - func_name_end); + } + } else { + // Just the index part (e.g., :0) + patterns.function_name_suffix = func_context.substr(func_name_end, suffix_end - func_name_end); + } + } + } else if (next_char == '\n' || next_char == '\r') { + // Check for markdown code block pattern (e.g., DeepSeek R1): \n```json\n{...}\n``` + size_t code_block_start = func_context.find("```", func_name_end); + if (code_block_start != std::string::npos && code_block_start < func_name_end + 10) { + // Found code block start after function name + // Skip the optional language tag (e.g., "json") + size_t newline_after_lang = func_context.find('\n', code_block_start + 3); + if (newline_after_lang != std::string::npos) { + // function_name_suffix should include everything up to (and including) the newline after language tag + patterns.function_name_suffix = + func_context.substr(func_name_end, newline_after_lang - func_name_end + 1); + LOG_DBG("Found markdown code block suffix: '%s'\n", patterns.function_name_suffix.c_str()); + } + } + } + } + + // Function closer + size_t search_start = func_name_end; + if (!patterns.function_name_suffix.empty()) { + search_start += patterns.function_name_suffix.length(); + } + patterns.function_closer = find_closing_pattern(func_context, search_start); + + // Fix for XML-style tag formats where function_closer was detected as "}" (JSON closing) + // but should be the actual tag closer (e.g., <|tool_call_end|> or <|tool▁call▁end|>) + if (patterns.function_closer == "}" && !patterns.function_opener.empty() && + patterns.function_opener[0] == '<') { + // This is an XML-style tag format, so the closer should be a tag, not just "}" + // Find the next tag marker after the search position + size_t next_tag = func_context.find('<', search_start); + if (next_tag != std::string::npos) { + // Handle both standard <|...|> and fullwidth <|...|> formats + size_t closer_pos = find_token_closer(func_context, next_tag); + if (closer_pos != std::string::npos) { + size_t closer_len = get_token_closer_length(func_context, closer_pos); + patterns.function_closer = func_context.substr(next_tag, closer_pos - next_tag + closer_len); + LOG_DBG("Adjusted function_closer from '}' to tag '%s' for XML-style format\n", + patterns.function_closer.c_str()); + } + } + } + + if (patterns.function_closer == "}" && !patterns.function_name_suffix.empty() && + patterns.function_name_suffix.find("```") != std::string::npos) { + // function_name_suffix contains a code block opener, look for the closing code block + size_t code_block_end = func_context.find("```", search_start); + if (code_block_end != std::string::npos) { + // Found closing code block, extract everything from ``` to end of tool call + // The closer should be \n``` (everything from ``` to the end marker) + size_t after_block = code_block_end + 3; + // Find the next tag marker (e.g., <|tool_call_end|>) + size_t next_tag = func_context.find('<', after_block); + if (next_tag != std::string::npos) { + size_t tag_end = func_context.find('>', next_tag); + if (tag_end != std::string::npos) { + // Don't include leading newline - the JSON args parser consumes trailing whitespace + // So start exactly at the ``` (code_block_end) + patterns.function_closer = func_context.substr(code_block_end, tag_end - code_block_end + 1); + LOG_DBG("Detected markdown code block args, adjusted function_closer to: '%s'\n", + patterns.function_closer.c_str()); + } + } + } + } + + // Tool call start marker + if (patterns.function_opener.length() > 0 && + patterns.tool_call_opener.length() > patterns.function_opener.length()) { + size_t opener_start = patterns.tool_call_opener.length() - patterns.function_opener.length(); + if (opener_start > 0) { + std::string before_func = patterns.tool_call_opener.substr(0, opener_start); + size_t last_bracket = before_func.find_last_of('['); + size_t tool_obj_brace = std::string::npos; + if (last_bracket != std::string::npos && last_bracket + 1 < before_func.length()) { + tool_obj_brace = before_func.find('{', last_bracket + 1); + } + + if (tool_obj_brace != std::string::npos) { + patterns.tool_call_start_marker = before_func.substr(0, tool_obj_brace); + } else if (last_bracket != std::string::npos) { + patterns.tool_call_start_marker = before_func.substr(0, last_bracket + 1); + } else { + patterns.tool_call_start_marker = before_func; + } + } + } else if (patterns.tool_call_start_marker.empty()) { + // Only search if not already set (e.g., by >>> prefix detection) + patterns.tool_call_start_marker = find_tool_call_start(tool1_diff); + } + + if (patterns.tool_call_opener.empty()) { + patterns.tool_call_opener = infer_tool_call_opener(tool1_diff, tool2_diff, tool3_diff); + if (func1_pos != std::string::npos && patterns.tool_call_opener.length() > func1_pos) { + patterns.tool_call_opener = patterns.tool_call_opener.substr(0, func1_pos); + } + } + if (patterns.tool_call_closer.empty()) { + patterns.tool_call_closer = infer_tool_call_closer(tool1_diff, tool2_diff, tool3_diff); + } + + patterns.tool_call_end_marker = find_tool_call_end(func_context, func1_pos); + + if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.length() > 1) { + size_t eos_pos = patterns.tool_call_end_marker.find("<|"); + if (eos_pos == 1) { + // Check if there's a bracket/brace before the token + char first_char = patterns.tool_call_end_marker[0]; + if (first_char == ']' || first_char == '}') { + // Check if this is an actual EOS token (contains "eot_id" or "eos") + std::string token_content = patterns.tool_call_end_marker.substr(eos_pos); + if (token_content.find("eot_id") != std::string::npos || + token_content.find("eos") != std::string::npos) { + // This is an EOS token, strip it + patterns.tool_call_end_marker = patterns.tool_call_end_marker.substr(0, 1); + } + } + } + } + + // Trim whitespace + if (!patterns.tool_call_end_marker.empty()) { + size_t first = patterns.tool_call_end_marker.find_first_not_of(" \n\t"); + size_t last = patterns.tool_call_end_marker.find_last_not_of(" \n\t"); + if (first != std::string::npos && last != std::string::npos) { + patterns.tool_call_end_marker = patterns.tool_call_end_marker.substr(first, (last - first + 1)); + } + } + + // If tool_call_end_marker matches function_closer, it found the wrong tag. + // Use tool_call_closer instead which is derived from common suffix of diffs. + if (!patterns.function_closer.empty() && patterns.tool_call_end_marker == patterns.function_closer) { + if (!patterns.tool_call_closer.empty()) { + // Try to extract a proper closing tag from tool_call_closer + // Use rfind to get the LAST closing tag (e.g., not ) + size_t close_start = patterns.tool_call_closer.rfind("', close_start); + if (close_end != std::string::npos) { + patterns.tool_call_end_marker = + patterns.tool_call_closer.substr(close_start, close_end - close_start + 1); + } + } + } + } else if (patterns.tool_call_end_marker == ">" && !patterns.tool_call_closer.empty() && + patterns.tool_call_closer.length() > 3) { + // If the specific end marker is just ">", but the common suffix (tool_call_closer) is substantial (e.g. <|tool_calls_section_end|>) + // then prefer the common suffix, as finding ">" might just be hitting the end of the last function call + if (patterns.tool_call_closer.find(patterns.tool_call_end_marker) != std::string::npos) { + patterns.tool_call_end_marker = patterns.tool_call_closer; + } + } + + if (patterns.tool_call_start_marker.empty()) { + std::vector diffs = { tool1_diff, tool2_diff, tool3_diff }; + patterns.tool_call_start_marker = find_common_substring_limited(diffs, 20, " \n\t<[{"); + } + + // Truncate if needed, but skip if func_pos is 0 (marker found via full output) + if (func1_pos != std::string::npos && func1_pos > 0 && patterns.tool_call_start_marker.length() > func1_pos) { + std::string candidate = patterns.tool_call_start_marker.substr(0, func1_pos); + size_t last_opener = candidate.find_last_of("{["); + if (last_opener != std::string::npos) { + patterns.tool_call_start_marker = candidate.substr(0, last_opener); + } else { + patterns.tool_call_start_marker = candidate; + } + } + + // Ensure we don't truncate in the middle of <|...|> tokens + patterns.tool_call_start_marker = adjust_to_token_boundary(patterns.tool_call_start_marker); + patterns.tool_call_end_marker = adjust_to_token_boundary(patterns.tool_call_end_marker); + + // Final trim + if (!patterns.tool_call_start_marker.empty()) { + size_t first = patterns.tool_call_start_marker.find_first_not_of(" \n\t\r"); + size_t last = patterns.tool_call_start_marker.find_last_not_of(" \n\t\r"); + if (first != std::string::npos && last != std::string::npos) { + patterns.tool_call_start_marker = patterns.tool_call_start_marker.substr(first, (last - first + 1)); + } + } + } + + return patterns; +} + +internal_tool_format determine_format_from_patterns(const internal_discovered_pattern & patterns) { + LOG_DBG("%s\n", __func__); + + if (patterns.tool_call_opener.empty() && patterns.tool_call_closer.empty() && patterns.function_opener.empty() && + patterns.function_closer.empty() && patterns.parameter_opener.empty() && patterns.parameter_closer.empty() && + patterns.argument_separator.empty() && patterns.tool_call_start_marker.empty() && + patterns.tool_call_end_marker.empty()) { + LOG_DBG("All patterns are empty - template doesn't support tool calls\n"); + return FORMAT_UNKNOWN; + } + + // Check for markdown code block format (Cohere Command-R Plus) + // STRUCTURAL PATTERN: Action:\n```json\n[...]\n``` + // Key indicators: + // 1. tool_call_start_marker contains "Action:" or similar plain text marker + // 2. function_name_suffix or tool_call_closer contains "```" (markdown code fence) + // 3. tool_call_opener starts with "[" indicating JSON array + bool has_code_fence = false; + if (!patterns.function_name_suffix.empty() && patterns.function_name_suffix.find("```") != std::string::npos) { + has_code_fence = true; + } + if (!patterns.tool_call_closer.empty() && patterns.tool_call_closer.find("```") != std::string::npos) { + has_code_fence = true; + } + bool has_action_marker = false; + if (!patterns.tool_call_start_marker.empty()) { + std::string marker_lower = patterns.tool_call_start_marker; + std::transform(marker_lower.begin(), marker_lower.end(), marker_lower.begin(), ::tolower); + if (marker_lower.find("action") != std::string::npos) { + has_action_marker = true; + } + } + if (has_code_fence && has_action_marker) { + LOG_DBG("Detected MARKDOWN_CODE_BLOCK format (Action: + ```json code fence)\n"); + return FORMAT_MARKDOWN_CODE_BLOCK; + } + + // Check for recipient-based routing format (e.g., Functionary v3.2) + // STRUCTURAL PATTERN: The same marker is used for both content routing and tool routing + // Key indicators: + // 1. tool_call_start_marker == function_opener (same marker used for both) + // 2. No parameter markers (arguments are plain dict/JSON, not wrapped in tags) + // 3. No XML-style tags (differentiates from FUNC_TAG_WITH_NAME) + // 4. function_opener doesn't start with structural chars like {, [, < (differentiates from other formats) + if (!patterns.tool_call_start_marker.empty() && !patterns.function_opener.empty() && + patterns.tool_call_start_marker == patterns.function_opener) { + // Check this isn't an XML-tagged format (opener would start with '<') + if (patterns.function_opener[0] != '<' && patterns.function_opener[0] != '{' && + patterns.function_opener[0] != '[') { + // Check there are no parameter markers + if (patterns.parameter_opener.empty() && patterns.parameter_closer.empty()) { + LOG_DBG("Detected RECIPIENT_BASED format (tool_call_start_marker == function_opener = '%s')\n", + patterns.tool_call_start_marker.c_str()); + return FORMAT_RECIPIENT_BASED; + } + } + } + + if (!patterns.tool_call_opener.empty()) { + if (patterns.tool_call_opener.find("{\"name\":") != std::string::npos || + patterns.tool_call_opener.find("{"name":") != std::string::npos) { + LOG_DBG("Detected JSON_NATIVE format from tool_call_opener JSON structure\n"); + return FORMAT_JSON_NATIVE; + } + } + + if (!patterns.function_opener.empty() && patterns.function_opener.find('<') == 0) { + bool has_substantial_param_markers = false; + if (!patterns.parameter_opener.empty()) { + has_substantial_param_markers = (count_non_whitespace(patterns.parameter_opener) > 1); + } + if (!has_substantial_param_markers && !patterns.parameter_closer.empty()) { + has_substantial_param_markers = (count_non_whitespace(patterns.parameter_closer) > 1); + } + + if (!has_substantial_param_markers) { + if ((!patterns.tool_call_opener.empty() && (patterns.tool_call_opener.find('[') != std::string::npos || + patterns.tool_call_opener.find('{') != std::string::npos)) || + (!patterns.tool_call_start_marker.empty() && + (patterns.tool_call_start_marker.find('[') != std::string::npos || + patterns.tool_call_start_marker.find('{') != std::string::npos))) { + LOG_DBG("Detected JSON_NATIVE format (XML markers but JSON structure)\n"); + return FORMAT_JSON_NATIVE; + } + } + + LOG_DBG("Detected XML_CONSTRUCTED format from function_opener\n"); + return FORMAT_XML_CONSTRUCTED; + } + + if (!patterns.function_opener.empty() && patterns.function_opener.find('{') == 0) { + LOG_DBG("Detected JSON_NATIVE format from function_opener\n"); + return FORMAT_JSON_NATIVE; + } + + // Check for bracket-tag format: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} + // Detected when function_name_suffix contains bracket tags like [CALL_ID]...[ARGS] + if (!patterns.function_name_suffix.empty() && patterns.function_name_suffix.find('[') != std::string::npos && + patterns.function_name_suffix.find(']') != std::string::npos) { + LOG_DBG("Detected BRACKET_TAG format from function_name_suffix containing bracket tags\n"); + return FORMAT_BRACKET_TAG; + } + + if (!patterns.tool_call_start_marker.empty() && + (patterns.tool_call_start_marker.find('<') == 0 || patterns.tool_call_start_marker.find('[') == 0)) { + bool is_prefix_marker = + patterns.tool_call_start_marker.find("<|") == 0 || patterns.tool_call_start_marker.find("[|") == 0; + // Check for bracket-tag format: [TAG] style without | (e.g., [TOOL_CALLS]) + bool is_bracket_tag = patterns.tool_call_start_marker.find('[') == 0 && + patterns.tool_call_start_marker.find("[|") != 0 && + patterns.tool_call_start_marker.find(']') != std::string::npos; + if (is_bracket_tag) { + LOG_DBG("Detected BRACKET_TAG format from tool_call_start_marker\n"); + return FORMAT_BRACKET_TAG; + } + if (is_prefix_marker) { + LOG_DBG("Detected JSON_NATIVE format from tool_call_start_marker (instruction-based)\n"); + return FORMAT_JSON_NATIVE; + } + + LOG_DBG("Detected XML_CONSTRUCTED format from tool_call_start_marker\n"); + return FORMAT_XML_CONSTRUCTED; + } + + if (!patterns.tool_call_start_marker.empty() && patterns.tool_call_start_marker.find('{') == 0) { + LOG_DBG("Detected JSON_NATIVE format from tool_call_start_marker\n"); + return FORMAT_JSON_NATIVE; + } + + if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.find('>') == 0) { + LOG_DBG("Detected XML_CONSTRUCTED format from tool_call_end_marker\n"); + return FORMAT_XML_CONSTRUCTED; + } + + if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.find('}') == 0) { + LOG_DBG("Detected JSON_NATIVE format from tool_call_end_marker\n"); + return FORMAT_JSON_NATIVE; + } + + LOG_DBG("Format could not be determined from patterns\n"); + return FORMAT_UNKNOWN; +} + +internal_discovered_pattern analyze_by_differential(const common_chat_template & tmpl) { + internal_discovered_pattern patterns; + + try { + LOG_DBG("%s\n", __func__); + + auto caps = tmpl.original_caps(); + bool minja_supports_tool_calls = caps.supports_tool_calls; + if (!minja_supports_tool_calls) { + LOG_DBG("Template doesn't support standard tool calls (per minja caps detection)\n"); + } + + // Define tools for testing + json tools = { + { { "type", "function" }, + { "function", + { { "name", "test_function_name" }, + { "description", "A test function" }, + { "parameters", + { { "type", "object" }, + { "properties", + { { "param1", { { "type", "string" }, { "description", "First parameter" } } }, + { "param2", { { "type", "string" }, { "description", "Second parameter" } } } } }, + { "required", json::array({ "param1", "param2" }) } } } } } }, + { { "type", "function" }, + { "function", + { { "name", "another_test_function" }, + { "description", "Another test function" }, + { "parameters", + { { "type", "object" }, + { "properties", + { { "param1", { { "type", "string" }, { "description", "First parameter" } } } } }, + { "required", json::array({ "param1" }) } } } } } } + }; + + // Test payload 1: Tool definitions + user + assistant with content only (no tool calls) + json user_msg = { + { "role", "user" }, + { "content", "Please help me with a task." } + }; + + json assistant_content_only = { + { "role", "assistant" }, + { "content", "I'll help you with that task right away." } + }; + + // Test payload 2: Tool definitions + user + assistant with content + tool calls + json assistant_content_with_tool = { + { "role", "assistant" }, + { "content", "I'll help you with that task right away." }, + { "tool_calls", + json::array( + { { { "id", "call_0001" }, + { "type", "function" }, + { "function", + { { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } + }; + + // Also test with content = null + tool calls (some templates check for this) + json assistant_null_content_with_tool = { + { "role", "assistant" }, + { "content", nullptr }, + { "tool_calls", + json::array( + { { { "id", "call_0001" }, + { "type", "function" }, + { "function", + { { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } + }; + + struct templates_params inputs; + inputs.tools = tools; + inputs.add_generation_prompt = false; + + // Helper function to safely render template, handling null content issues + auto safe_render = [&](const json & messages) -> std::string { + try { + // First try with the original messages + inputs.messages = messages; + return common_chat_template_direct_apply(tmpl, inputs); + } catch (const std::exception & e) { + // If it fails, try replacing null content with empty string + json fixed_messages = messages; + for (auto & msg : fixed_messages) { + if (msg.contains("content") && msg["content"].is_null()) { + msg["content"] = ""; + } + } + inputs.messages = fixed_messages; + try { + return common_chat_template_direct_apply(tmpl, inputs); + } catch (...) { + return ""; + } + } + }; + + // Render payload 1: content only + std::string output_content_only = safe_render({ user_msg, assistant_content_only }); + + // Render payload 2: content + tool calls + std::string output_content_with_tool = safe_render({ user_msg, assistant_content_with_tool }); + + // Render payload 3: null content + tool calls + std::string output_null_content_with_tool = safe_render({ user_msg, assistant_null_content_with_tool }); + + LOG_DBG("Output 1 (content only): %s\n", output_content_only.c_str()); + LOG_DBG("Output 2 (content + tools): %s\n", output_content_with_tool.c_str()); + LOG_DBG("Output 3 (null + tools): %s\n", output_null_content_with_tool.c_str()); + + // Check if the template renders tool calls in any scenario + // Test 1: content vs content+tool_calls (for templates that render both) + // Test 2: content vs null+tool_calls (for templates that only render tools when content is null) + bool renders_tool_calls_with_content = (output_content_only != output_content_with_tool); + bool renders_tool_calls_without_content = (output_content_only != output_null_content_with_tool); + + if (!renders_tool_calls_with_content && !renders_tool_calls_without_content) { + LOG_DBG("Template does NOT render tool calls in any scenario\n"); + // Return empty patterns to indicate no tool support + return patterns; + } + + LOG_DBG("Template renders tool calls, proceeding with differential analysis\n"); + + // If we get here, the template does support tool calls + // Use the original differential analysis approach but now we know it's valid + json base_msg = { + { "role", "assistant" }, + { "content", "MARKER" } + }; + + // Use nullptr for content to trigger tool_calls branch in templates that check "content is none" + // Include "id" field as some templates (e.g., Mistral Nemo) require it + json tool_msg1 = { + { "role", "assistant" }, + { "content", nullptr }, + { "tool_calls", + json::array( + { { { "id", "call_0001" }, + { "type", "function" }, + { "function", { { "name", "test_function_name" }, { "arguments", json::object() } } } } }) } + }; + + json tool_msg2 = { + { "role", "assistant" }, + { "content", nullptr }, + { "tool_calls", + json::array( + { { { "id", "call_0001" }, + { "type", "function" }, + { "function", + { { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } + }; + + json tool_msg3 = { + { "role", "assistant" }, + { "content", nullptr }, + { "tool_calls", + json::array( + { { { "id", "call_0001" }, + { "type", "function" }, + { "function", { { "name", "test_function_name" }, { "arguments", json::object() } } } }, + { { "id", "call_0002" }, + { "type", "function" }, + { "function", { { "name", "another_test_function" }, { "arguments", json::object() } } } } }) } + }; + + inputs.messages = { user_msg, base_msg }; + auto base_output = safe_render({ user_msg, base_msg }); + + inputs.messages = { user_msg, tool_msg1 }; + auto tool1_output = safe_render({ user_msg, tool_msg1 }); + + // Detect if template renders null content as "None" (Python/Jinja string representation) + // This happens when templates concatenate content without null checks, e.g.: + // {{ '<|im_start|>' + message.role + '\n' + content }} + // Check if "None" appears in the tool output where it shouldn't + if (tool1_output.find("None") != std::string::npos) { + // Verify this is actually from null content by checking if it goes away with empty string + json tool_msg1_empty_content = tool_msg1; + tool_msg1_empty_content["content"] = ""; + auto tool1_output_empty = safe_render({ user_msg, tool_msg1_empty_content }); + if (tool1_output_empty.find("None") == std::string::npos) { + LOG_DBG("Template renders null content as 'None', switching to empty string\n"); + patterns.requires_nonnull_content = true; + tool1_output = tool1_output_empty; + + // Update tool messages to use empty string instead of null + tool_msg1["content"] = ""; + tool_msg2["content"] = ""; + tool_msg3["content"] = ""; + } + } + + inputs.messages = { user_msg, tool_msg2 }; + auto tool2_output = safe_render({ user_msg, tool_msg2 }); + + inputs.messages = { user_msg, tool_msg3 }; + auto tool3_output = safe_render({ user_msg, tool_msg3 }); + + std::string tool1_diff = find_string_difference(base_output, tool1_output); + std::string tool2_diff = find_string_difference(base_output, tool2_output); + std::string tool3_diff = find_string_difference(base_output, tool3_output); + + LOG_DBG("Tool1 diff length: %zu\n", tool1_diff.length()); + LOG_DBG("Tool2 diff length: %zu\n", tool2_diff.length()); + LOG_DBG("Tool3 diff length: %zu\n", tool3_diff.length()); + + if (tool1_diff.empty() && tool2_diff.empty() && tool3_diff.empty()) { + LOG_DBG("All diffs are empty - trying without add_generation_prompt\n"); + // Try with add_generation_prompt variations + json alternative_base_msg = { + { "role", "assistant" }, + { "content", "MARKER" } + }; + + templates_params alt_inputs; + alt_inputs.tools = tools; + alt_inputs.messages = { user_msg, alternative_base_msg }; + alt_inputs.add_generation_prompt = false; + auto alt_base = common_chat_template_direct_apply(tmpl, alt_inputs); + + alt_inputs.messages = { user_msg, tool_msg1 }; + auto alt_tool1 = common_chat_template_direct_apply(tmpl, alt_inputs); + + tool1_diff = find_string_difference(alt_base, alt_tool1); + if (!tool1_diff.empty()) { + // If we found a diff using the alternative approach, we must use the corresponding + // full output for pattern extraction (otherwise diff indices will be invalid) + tool1_output = alt_tool1; + + alt_inputs.messages = { user_msg, tool_msg2 }; + tool2_diff = find_string_difference(alt_base, common_chat_template_direct_apply(tmpl, inputs)); + alt_inputs.messages = { user_msg, tool_msg3 }; + tool3_diff = find_string_difference(alt_base, common_chat_template_direct_apply(tmpl, inputs)); + } + } + + patterns = extract_patterns_from_differences(tool1_diff, tool2_diff, tool3_diff, tool1_output); + + LOG_DBG("=== ENDING TEMPLATE DIFFERENTIAL ANALYSIS ===\n"); + + } catch (const std::exception & e) { + LOG_DBG("Template differential analysis failed: %s\n", e.what()); + } + + return patterns; +} diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h new file mode 100644 index 0000000000..5162b09fbe --- /dev/null +++ b/common/chat-auto-parser-helpers.h @@ -0,0 +1,133 @@ +#pragma once + +#include +#include +#include + +#include "chat.h" +#include "nlohmann/json.hpp" + +using json = nlohmann::ordered_json; + +namespace minja { +class chat_template; +} + +void trim_whitespace(std::string & str); +void trim_trailing_newlines(std::string & str); +size_t count_non_whitespace(const std::string & str); +size_t find_last_of_any(const std::string & str, const std::string & chars, size_t start_pos); + +std::string extract_tag_name(const std::string & tag); +std::string create_closing_tag(const std::string & opening_tag); + +std::string find_common_prefix(const std::vector & strings); +std::string find_common_suffix_generic(const std::vector & strings); +std::string find_common_substring_limited(const std::vector & strings, + size_t max_length, + const std::string & delimiters); + +bool string_ends_with(const std::string & str, const std::string & suffix); +std::string apply_template(common_chat_template & tmpl, + const struct templates_params & inputs, + const std::optional & messages_override = std::nullopt, + const std::optional & tools_override = std::nullopt, + const std::optional & additional_context = std::nullopt); + +// Adjust a marker string to ensure it ends at a complete <|...|> token boundary +// This prevents truncation mid-token +std::string adjust_to_token_boundary(const std::string & str); + +// Find the position of a token opener (<| or <|) in a string +// Returns std::string::npos if not found +size_t find_token_opener(const std::string & str, size_t start_pos = 0); + +// Find the position of a token closer (|> or |>) in a string +// Returns std::string::npos if not found +size_t find_token_closer(const std::string & str, size_t start_pos = 0); + +// Get the length of the token opener at the given position (2 for <| or 4 for <|) +// Returns 0 if no valid opener at position +size_t get_token_opener_length(const std::string & str, size_t pos); + +// Get the length of the token closer at the given position (2 for |> or 4 for |>) +// Returns 0 if no valid closer at position +size_t get_token_closer_length(const std::string & str, size_t pos); + +// Strip EOS/end-of-sentence tokens from the end of a string +// Handles both standard (<|eos|>, <|eot_id|>) and fullwidth (<|end▁of▁sentence|>) formats +std::string strip_eos_token(const std::string & str); + +// Internal structure for differential analysis (used during pattern extraction) +struct internal_discovered_pattern { + std::string tool_call_opener; + std::string tool_call_closer; + std::string function_opener; + std::string function_closer; + std::string function_name_suffix; + std::string parameter_opener; + std::string parameter_closer; + std::string argument_separator; + std::string parameter_key_prefix; + std::string parameter_key_suffix; + std::string tool_call_start_marker; + std::string tool_call_end_marker; + std::string reasoning_start_marker; + std::string reasoning_end_marker; + std::string content_start_marker; + std::string content_end_marker; + std::string tool_name_field = "name"; + std::string tool_args_field = "arguments"; + std::string tool_id_field; + // For markdown code block format (Cohere Command-R Plus) + std::string code_block_marker; // e.g., "Action:" + std::string code_block_language; // e.g., "json" + // Flag: template renders null content as "None" string, requires empty string instead + bool requires_nonnull_content = false; +}; + +// Internal enum for format classification +enum internal_tool_format { + FORMAT_JSON_NATIVE, + FORMAT_XML_CONSTRUCTED, + FORMAT_BRACKET_TAG, // [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} (Mistral Small 3.2) + FORMAT_RECIPIENT_BASED, // >>>recipient\n{content} (Functionary v3.2) + FORMAT_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[...]\n``` (Cohere Command-R Plus) + FORMAT_CONTENT_ONLY, + FORMAT_UNKNOWN +}; + +// Find the suffix that differentiates an extended string from a base string +std::string find_string_difference(const std::string & base, const std::string & extended); + +// Extract JSON field name from an opener string +std::string extract_json_field_name(const std::string & opener, + const std::string & default_name, + const std::vector & candidates); + +// Find a closing pattern in a string starting from a given position +std::string find_closing_pattern(const std::string & diff, size_t func_pos); + +// Find the tool call start marker in a difference string +std::string find_tool_call_start(const std::string & diff); + +// Find the tool call end marker in a difference string +std::string find_tool_call_end(const std::string & diff, size_t func_pos); + +// Infer the tool call opener from multiple difference strings +std::string infer_tool_call_opener(const std::string & diff1, const std::string & diff2, const std::string & diff3); + +// Infer the tool call closer from multiple difference strings +std::string infer_tool_call_closer(const std::string & diff1, const std::string & diff2, const std::string & diff3); + +// Extract patterns from differences between tool calls +internal_discovered_pattern extract_patterns_from_differences(const std::string & tool1_diff, + const std::string & tool2_diff, + const std::string & tool3_diff, + const std::string & tool1_full = ""); + +// Determine the format classification from discovered patterns +internal_tool_format determine_format_from_patterns(const internal_discovered_pattern & patterns); + +// Analyze template using differential analysis (internal use) +internal_discovered_pattern analyze_by_differential(const common_chat_template & tmpl); diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h new file mode 100644 index 0000000000..6062f4d37a --- /dev/null +++ b/common/chat-auto-parser.h @@ -0,0 +1,183 @@ +#pragma once + +#include "chat.h" +#include "common.h" +#include "jinja/runtime.h" + +#include +#include +#include + +using json = nlohmann::ordered_json; + +// Phase 1 result: Content and reasoning structure (analyzed without tools) +struct content_structure { + // Reasoning handling mode + enum reasoning_mode_type { + REASONING_NONE, // No reasoning markers detected + REASONING_OPTIONAL, // ... may appear before content + REASONING_FORCED_OPEN, // Template ends with open reasoning tag (thinking_forced_open) + }; + + reasoning_mode_type reasoning_mode = REASONING_NONE; + std::string reasoning_start; // e.g., "", "<|START_THINKING|>" + std::string reasoning_end; // e.g., "", "<|END_THINKING|>" + + // Content wrapping mode + enum content_mode_type { + CONTENT_PLAIN, // No content markers + CONTENT_ALWAYS_WRAPPED, // ... always present + CONTENT_WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present + }; + + content_mode_type content_mode = CONTENT_PLAIN; + std::string content_start; // e.g., "", "<|START_RESPONSE|>" + std::string content_end; // e.g., "", "<|END_RESPONSE|>" +}; + +// Phase 2 result: Tool call structure (layered on Phase 1) +struct tool_call_structure { + bool supports_tools = false; + + // Container markers (what wraps all tool calls) + std::string tool_section_start; // e.g., "", "[TOOL_CALLS]", "", "" + std::string tool_section_end; // e.g., "", "]", "", "" + + // Function format (how individual functions are structured) + enum function_format { + FUNC_JSON_OBJECT, // {"name": "X", "arguments": {...}} + FUNC_TAG_WITH_NAME, // {...} + FUNC_TAG_NAME_ONLY, // ... where X is function name (rare) + FUNC_PREFIXED_INDEXED, // <|tool_call_begin|>functions.X:0<|tool_call_argument_begin|>{...}<|tool_call_end|> + FUNC_NAME_AS_KEY, // [{"function_name": {...arguments...}}] (Apertus-style) + FUNC_BRACKET_TAG, // [TOOL_CALLS]X[CALL_ID]id[ARGS]{...} (Mistral Small 3.2 style) + FUNC_RECIPIENT_BASED, // >>>recipient\n{content} where recipient is "all" (content) or function name (tools) + FUNC_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[...]\n``` (Cohere Command-R Plus style) + }; + + function_format function_format = FUNC_JSON_OBJECT; + + // For FUNC_JSON_OBJECT format - field names (may vary between templates) + std::string name_field = "name"; // Could be "tool_name", "function" + std::string args_field = "arguments"; // Could be "parameters", "params", "input" + std::string id_field; // Optional: "id", "tool_call_id", "" + + // For FUNC_TAG_WITH_NAME format + std::string function_prefix; // e.g., "" + std::string function_close; // e.g., "" + + // For FUNC_PREFIXED_INDEXED format (e.g., Kimi-K2) + std::string per_call_start; // e.g., "<|tool_call_begin|>" + std::string function_namespace; // e.g., "functions." (prefix before function name) + std::string args_marker; // e.g., "<|tool_call_argument_begin|>" + std::string per_call_end; // e.g., "<|tool_call_end|>" + + // For FUNC_BRACKET_TAG format (e.g., Mistral Small 3.2) + std::string id_marker; // e.g., "[CALL_ID]" - marker before tool call ID + + // For FUNC_MARKDOWN_CODE_BLOCK format (e.g., Cohere Command-R Plus) + std::string code_block_marker; // e.g., "Action:" - text marker before code block + std::string code_block_language; // e.g., "json" - language identifier in code fence + + // Argument format (how arguments are structured within a function) + enum argument_format { + ARGS_JSON, // Standard JSON object: {"key": "value", ...} + ARGS_TAGGED, // XML-style: value + ARGS_KEY_VALUE_TAGS, // keyvalue (GLM-4.6) + }; + + argument_format argument_format = ARGS_JSON; + + // For ARGS_TAGGED format + std::string arg_prefix; // e.g., "" + std::string arg_close; // e.g., "", "" + std::string arg_separator; // e.g., "", "\n" + + // Flag: template renders null content as "None" string, requires empty string instead + bool requires_nonnull_content = false; +}; + +// Combined result of unified template analysis +struct template_analysis_result { + content_structure content; + tool_call_structure tools; + + // Preserved tokens for tokenizer (union of all markers) + std::vector preserved_tokens; +}; + +// Template analyzer that uses two-phase differential analysis +class template_analyzer { + public: + // Main entry point: Unified two-phase analysis + static template_analysis_result analyze_template(const common_chat_template & tmpl); + + // Phase 1 - Analyze content and reasoning structure (no tools) + static content_structure analyze_content_structure(const common_chat_template & tmpl); + + // Phase 2 - Analyze tool call structure (layered on Phase 1) + static tool_call_structure analyze_tool_structure(const common_chat_template & tmpl, + const content_structure & content); + + private: + // Phase 1 detection helpers + static void detect_reasoning_markers(const common_chat_template & tmpl, content_structure & cs); + static void detect_content_markers(const common_chat_template & tmpl, content_structure & cs); + static content_structure::reasoning_mode_type detect_reasoning_mode(const content_structure & cs, + const std::string & prompt); + + // Phase 2 detection helpers + static void detect_tool_markers(const common_chat_template & tmpl, tool_call_structure & ts); + static void detect_function_format(const common_chat_template & tmpl, tool_call_structure & ts); + static void detect_argument_format(const common_chat_template & tmpl, tool_call_structure & ts); + + // Phase 2 helper methods + static void analyze_json_format(tool_call_structure & ts, const struct internal_discovered_pattern & discovered); + static void analyze_xml_format(tool_call_structure & ts, const struct internal_discovered_pattern & discovered); + static void analyze_bracket_tag_format(tool_call_structure & ts, + const struct internal_discovered_pattern & discovered); + static void analyze_recipient_based_format(tool_call_structure & ts, + const struct internal_discovered_pattern & discovered); + static void analyze_markdown_code_block_format(tool_call_structure & ts, + const struct internal_discovered_pattern & discovered); + + // Helper to collect preserved tokens from analysis result + static void collect_preserved_tokens(template_analysis_result & result); +}; + +struct templates_params { + json messages; + json tools; + common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + json json_schema; + bool parallel_tool_calls = true; + common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO; + bool stream = true; + std::string grammar; + bool add_generation_prompt = false; + bool enable_thinking = true; + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + json extra_context; + bool add_bos = false; + bool add_eos = false; + bool is_inference = true; + bool add_inference = false; + bool mark_input = true; // whether to mark input strings in the jinja context +}; + +class universal_peg_generator { + public: + // Generate parser from analysis result + static common_chat_params generate_parser(const template_analysis_result & analysis, + const common_chat_template & tmpl, + const struct templates_params & inputs); + + private: + // Build unified parser (single code path for all formats) + static common_peg_arena build_parser(const template_analysis_result & analysis, + const common_chat_template & tmpl, + const struct templates_params & inputs, + bool thinking_forced_open); +}; diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp deleted file mode 100644 index a80900ff8d..0000000000 --- a/common/chat-parser-xml-toolcall.cpp +++ /dev/null @@ -1,879 +0,0 @@ -#include "chat.h" -#include "chat-parser.h" -#include "common.h" -#include "json-partial.h" -#include "json-schema-to-grammar.h" -#include "log.h" -#include "regex-partial.h" - -using json = nlohmann::ordered_json; - -class xml_toolcall_syntax_exception : public std::runtime_error { - public: - xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {} -}; - -template -inline void sort_uniq(std::vector &vec) { - std::sort(vec.begin(), vec.end()); - vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); -} - -template -inline bool all_space(const T &str) { - return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); }); -} - -static size_t utf8_truncate_safe(const std::string_view s) { - size_t len = s.size(); - if (len == 0) return 0; - size_t i = len; - for (size_t back = 0; back < 4 && i > 0; ++back) { - --i; - unsigned char c = s[i]; - if ((c & 0x80) == 0) { - return len; - } else if ((c & 0xC0) == 0xC0) { - size_t expected_len = 0; - if ((c & 0xE0) == 0xC0) expected_len = 2; - else if ((c & 0xF0) == 0xE0) expected_len = 3; - else if ((c & 0xF8) == 0xF0) expected_len = 4; - else return i; - if (len - i >= expected_len) { - return len; - } else { - return i; - } - } - } - return len - std::min(len, size_t(3)); -} - -inline void utf8_truncate_safe_resize(std::string &s) { - s.resize(utf8_truncate_safe(s)); -} - -inline std::string_view utf8_truncate_safe_view(const std::string_view s) { - return s.substr(0, utf8_truncate_safe(s)); -} - -static std::optional try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) { - if (literal1.size() == 0) return builder.try_find_literal(literal2); - const auto saved_pos = builder.pos(); - while (auto res = builder.try_find_literal(literal1)) { - builder.consume_spaces(); - const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos()); - if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) { - if (res->prelude.size() != res->groups[0].begin - saved_pos) { - res->prelude = builder.str({saved_pos, res->groups[0].begin}); - } - builder.move_to(builder.pos() + match_len); - res->groups[0].end = builder.pos(); - GGML_ASSERT(res->groups[0].begin != res->groups[0].end); - return res; - } - builder.move_to(res->groups[0].begin + 1); - } - builder.move_to(saved_pos); - return std::nullopt; -} - -/** - * make a GBNF that accept any strings except those containing any of the forbidden strings. - */ -std::string make_gbnf_excluding(std::vector forbids) { - constexpr auto charclass_escape = [](unsigned char c) -> std::string { - if (c == '\\' || c == ']' || c == '^' || c == '-') { - std::string s = "\\"; - s.push_back((char)c); - return s; - } - if (isprint(c)) { - return std::string(1, (char)c); - } - char buf[16]; - snprintf(buf, 15, "\\x%02X", c); - return std::string(buf); - }; - constexpr auto build_expr = [charclass_escape](auto self, const std::vector& forbids, int l, int r, int depth) -> std::string { - std::vector>> children; - int i = l; - while (i < r) { - const std::string &s = forbids[i]; - if ((int)s.size() == depth) { - ++i; - continue; - } - unsigned char c = (unsigned char)s[depth]; - int j = i; - while (j < r && (int)forbids[j].size() > depth && - (unsigned char)forbids[j][depth] == c) { - ++j; - } - children.push_back({c, {i, j}}); - i = j; - } - std::vector alts; - if (!children.empty()) { - std::string cls; - for (auto &ch : children) cls += charclass_escape(ch.first); - alts.push_back(std::string("[^") + cls + "]"); - } - for (auto &ch : children) { - std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1); - if (!childExpr.empty()) { - std::string quoted_ch = "\""; - if (ch.first == '\\') quoted_ch += "\\\\"; - else if (ch.first == '"') quoted_ch += "\\\""; - else if (isprint(ch.first)) quoted_ch.push_back(ch.first); - else { - char buf[16]; - snprintf(buf, 15, "\\x%02X", ch.first); - quoted_ch += buf; - } - quoted_ch += "\""; - std::string branch = quoted_ch + std::string(" ") + childExpr; - alts.push_back(branch); - } - } - if (alts.empty()) return ""; - std::ostringstream oss; - oss << "( "; - for (size_t k = 0; k < alts.size(); ++k) { - if (k) oss << " | "; - oss << alts[k]; - } - oss << " )"; - return oss.str(); - }; - if (forbids.empty()) return "( . )*"; - sort(forbids.begin(), forbids.end()); - std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0); - if (expr.empty()) { - std::string cls; - for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]); - expr = std::string("( [^") + cls + "] )"; - } - if (forbids.size() == 1) - return expr + "*"; - else - return std::string("( ") + expr + " )*"; -} - -/** - * Build grammar for xml-style tool call - * form.scope_start and form.scope_end can be empty. - * Requires data.format for model-specific hacks. - */ -void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) { - GGML_ASSERT(!form.tool_start.empty()); - GGML_ASSERT(!form.tool_sep.empty()); - GGML_ASSERT(!form.key_start.empty()); - GGML_ASSERT(!form.val_end.empty()); - GGML_ASSERT(!form.tool_end.empty()); - - std::string key_val_sep = form.key_val_sep; - if (form.key_val_sep2) { - key_val_sep += "\n"; - key_val_sep += *form.key_val_sep2; - } - GGML_ASSERT(!key_val_sep.empty()); - - if (tools.is_array() && !tools.empty()) { - data.grammar = build_grammar([&](const common_grammar_builder &builder) { - auto string_arg_val = form.last_val_end ? - builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) : - builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end})); - - std::vector tool_rules; - for (const auto & tool : tools) { - if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) { - LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str()); - continue; - } - const auto & function = tool.at("function"); - if (!function.contains("name") || !function.at("name").is_string()) { - LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str()); - continue; - } - if (!function.contains("parameters") || !function.at("parameters").is_object()) { - LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str()); - continue; - } - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - - struct parameter_rule { - std::string symbol_name; - bool is_required; - }; - std::vector arg_rules; - if (!parameters.contains("properties") || !parameters.at("properties").is_object()) { - LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str()); - continue; - } else { - std::vector requiredParameters; - if (parameters.contains("required")) { - try { parameters.at("required").get_to(requiredParameters); } - catch (const std::runtime_error&) { - LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str()); - } - } - sort_uniq(requiredParameters); - for (const auto & [key, value] : parameters.at("properties").items()) { - std::string quoted_key = key; - bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key); - if (form.key_start.back() == '"' && key_val_sep[0] == '"') { - quoted_key = gbnf_format_literal(key); - quoted_key = quoted_key.substr(1, quoted_key.size() - 2); - } - arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key, - gbnf_format_literal(form.key_start) + " " + - gbnf_format_literal(quoted_key) + " " + - gbnf_format_literal(key_val_sep) + " " + - ((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ? - (form.raw_argval ? - string_arg_val : - "( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )" - ) : - builder.add_schema(name + "-arg-" + key, value) - ) - ), required}); - } - } - - auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end)); - decltype(next_arg_with_sep) next_arg = "\"\""; - for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) { - std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep; - next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ? - include_this_arg : "( " + include_this_arg + " ) | " + next_arg - ); - include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg; - next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ? - include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep - ); - } - - std::string quoted_name = name; - if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') { - quoted_name = gbnf_format_literal(name); - quoted_name = quoted_name.substr(1, quoted_name.size() - 2); - } - quoted_name = gbnf_format_literal(quoted_name); - // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name - if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) { - quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+"; - } - tool_rules.push_back(builder.add_rule(name + "-call", - gbnf_format_literal(form.tool_start) + " " + - quoted_name + " " + - gbnf_format_literal(form.tool_sep) + " " + - next_arg - )); - } - - auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | ")); - auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once); - auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end)); - auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end); - builder.add_rule("root", - (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") + - tool_call_multiple_with_end + "?" + - (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end)) - ); - }); - - // grammar trigger for tool call - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start }); - } -} - -/** - * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched. - * Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser. - * form.scope_start, form.tool_sep and form.scope_end can be empty. - */ -inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) { - GGML_ASSERT(!form.tool_start.empty()); - GGML_ASSERT(!form.key_start.empty()); - GGML_ASSERT(!form.key_val_sep.empty()); - GGML_ASSERT(!form.val_end.empty()); - GGML_ASSERT(!form.tool_end.empty()); - - // Helper to choose return false or throw error - constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) { - LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str()); - if (recovery) { - builder.move_to(start_pos); - return false; - } else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output."); - }; - // Drop substring from needle to end from a JSON - constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") { - auto pos = json_str.rfind(needle); - if (pos == std::string::npos) { - return false; - } - for (auto i = pos + needle.size(); i < json_str.size(); ++i) { - unsigned char ch = static_cast(json_str[i]); - if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) { - return false; - } - } - if (pos != 0 && json_str[pos - 1] == '"') { - --pos; - } - json_str.resize(pos); - return true; - }; - // Helper to generate a partial argument JSON - constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) { - auto rest = builder.consume_rest(); - utf8_truncate_safe_resize(rest); - set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG"); - auto tool_str = arguments.dump(); - if (partial_json(tool_str)) { - if (builder.add_tool_call(function_name, "", tool_str)) { - return; - } - } - LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str()); - }; - // Helper to find a close (because there may be form.last_val_end or form.last_tool_end) - constexpr auto try_find_close = []( - common_chat_msg_parser & builder, - const std::string & end, - const std::optional & alt_end, - const std::string & end_next, - const std::optional & alt_end_next - ) { - auto saved_pos = builder.pos(); - auto tc = builder.try_find_literal(end); - auto val_end_size = end.size(); - if (alt_end) { - auto pos_1 = builder.pos(); - builder.move_to(saved_pos); - auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next); - if (alt_end_next) { - builder.move_to(saved_pos); - auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next); - if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) { - tc2 = tc3; - } - } - if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) { - tc = tc2; - tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size()); - builder.move_to(tc->groups[0].end); - val_end_size = alt_end->size(); - } else { - builder.move_to(pos_1); - } - } - return std::make_pair(val_end_size, tc); - }; - // Helper to find a val_end or last_val_end, returns matched pattern size - const auto try_find_val_end = [try_find_close, &builder, &form]() { - return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end); - }; - // Helper to find a tool_end or last_tool_end, returns matched pattern size - const auto try_find_tool_end = [try_find_close, &builder, &form]() { - return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt); - }; - - bool recovery = true; - const auto start_pos = builder.pos(); - if (!all_space(form.scope_start)) { - if (auto tc = builder.try_find_literal(form.scope_start)) { - if (all_space(tc->prelude)) { - if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin) - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start)); - } else { - builder.move_to(start_pos); - return false; - } - } else return false; - } - while (auto tc = builder.try_find_literal(form.tool_start)) { - if (!all_space(tc->prelude)) { - LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n", - gbnf_format_literal(form.tool_start).c_str(), - gbnf_format_literal(tc->prelude).c_str() - ); - builder.move_to(tc->groups[0].begin - tc->prelude.size()); - break; - } - - // Find tool name - auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep); - if (!func_name) { - auto [sz, tc] = try_find_tool_end(); - func_name = tc; - } - if (!func_name) { - // Partial tool name not supported - throw common_chat_msg_partial_exception("incomplete tool_call"); - } - // If the model generate multiple tool call and the first tool call has no argument - if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) { - builder.move_to(func_name->groups[0].begin - func_name->prelude.size()); - auto [sz, tc] = try_find_tool_end(); - func_name = tc; - } - - // Parse tool name - builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end); - std::string function_name = string_strip(func_name->prelude); - // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name - if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) { - if (string_starts_with(function_name, "functions.")) { - static const std::regex re(":\\d+$"); - if (std::regex_search(function_name, re)) { - function_name = function_name.substr(10, function_name.rfind(":") - 10); - } - } - } - - // Argument JSON - json arguments = json::object(); - - // Helper to generate a partial argument JSON - const auto gen_partial_args = [&](auto set_partial_arg) { - gen_partial_json(set_partial_arg, arguments, builder, function_name); - }; - - // Parse all arg_key/arg_value pairs - while (auto tc = builder.try_find_literal(form.key_start)) { - if (!all_space(tc->prelude)) { - LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n", - gbnf_format_literal(form.key_start).c_str(), - gbnf_format_literal(tc->prelude).c_str() - ); - builder.move_to(tc->groups[0].begin - tc->prelude.size()); - break; - } - if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) { - auto tool_call_arg = arguments.dump(); - if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') { - tool_call_arg.resize(tool_call_arg.size() - 1); - } - builder.add_tool_call(function_name, "", tool_call_arg); - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start)); - } - - // Parse arg_key - auto key_res = builder.try_find_literal(form.key_val_sep); - if (!key_res) { - gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";}); - throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start)); - } - if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) { - gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";}); - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep)); - } - auto &key = key_res->prelude; - recovery = false; - - // Parse arg_value - if (form.key_val_sep2) { - if (auto tc = builder.try_find_literal(*form.key_val_sep2)) { - if (!all_space(tc->prelude)) { - LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n", - gbnf_format_literal(tc->prelude).c_str(), - gbnf_format_literal(form.key_val_sep).c_str(), - gbnf_format_literal(*form.key_val_sep2).c_str() - ); - return return_error(builder, start_pos, false); - } - if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;}); - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2)); - } - } else { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;}); - throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep)); - } - } - auto val_start = builder.pos(); - - // Test if arg_val is a partial JSON - std::optional value_json = std::nullopt; - if (!form.raw_argval || !*form.raw_argval) { - try { value_json = builder.try_consume_json(); } - catch (const std::runtime_error&) { builder.move_to(val_start); } - // TODO: Delete this when json_partial adds top-level support for null/true/false - if (builder.pos() == val_start) { - const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)"); - builder.consume_spaces(); - std::string_view sv = utf8_truncate_safe_view(builder.input()); - sv.remove_prefix(builder.pos()); - std::string rest = "a"; - if (sv.size() < 6) rest = sv; - if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) { - value_json = {123, {"123", "123"}}; - builder.consume_rest(); - } else { - builder.move_to(val_start); - } - } - } - - // If it is a JSON and followed by , parse as json - // cannot support streaming because it may be a plain text starting with JSON - if (value_json) { - auto json_end = builder.pos(); - builder.consume_spaces(); - if (builder.pos() == builder.input().size()) { - if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) { - arguments[key] = value_json->json; - auto json_str = arguments.dump(); - if (!value_json->healing_marker.json_dump_marker.empty()) { - GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker)); - json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker)); - } else { - GGML_ASSERT(json_str.back() == '}'); - json_str.resize(json_str.size() - 1); - } - builder.add_tool_call(function_name, "", json_str); - } else { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;}); - } - LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str()); - throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations."); - } - builder.move_to(json_end); - auto [val_end_size, tc] = try_find_val_end(); - if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) { - if (tc->groups[0].end - tc->groups[0].begin != val_end_size) { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;}); - LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str()); - throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : "")); - } else arguments[key] = value_json->json; - } else builder.move_to(val_start); - } - - // If not, parse as plain text - if (val_start == builder.pos()) { - if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) { - auto &value_str = value_plain->prelude; - if (form.trim_raw_argval) value_str = string_strip(value_str); - if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) { - gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;}); - throw common_chat_msg_partial_exception( - "Expected " + gbnf_format_literal(form.val_end) + - " after " + gbnf_format_literal(form.key_val_sep) + - (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "") - ); - } - arguments[key] = value_str; - } else { - if (form.trim_raw_argval) { - gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;}); - } else { - gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;}); - } - throw common_chat_msg_partial_exception( - "Expected " + gbnf_format_literal(form.val_end) + - " after " + gbnf_format_literal(form.key_val_sep) + - (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "") - ); - } - } - } - - // Consume closing tag - if (auto [tool_end_size, tc] = try_find_tool_end(); tc) { - if (!all_space(tc->prelude)) { - LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n", - gbnf_format_literal(form.tool_end).c_str(), - gbnf_format_literal(tc->prelude).c_str() - ); - return return_error(builder, start_pos, recovery); - } - if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) { - // Add the parsed tool call - if (!builder.add_tool_call(function_name, "", arguments.dump())) { - throw common_chat_msg_partial_exception("Failed to add XML-Style tool call"); - } - recovery = false; - continue; - } - } - - auto tool_call_arg = arguments.dump(); - if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') { - tool_call_arg.resize(tool_call_arg.size() - 1); - } - builder.add_tool_call(function_name, "", tool_call_arg); - throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end)); - } - if (auto tc = builder.try_find_literal(form.scope_end)) { - if (!all_space(tc->prelude)) { - LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n", - gbnf_format_literal(form.scope_end).c_str(), - gbnf_format_literal(tc->prelude).c_str() - ); - return return_error(builder, start_pos, recovery); - } - } else { - if (all_space(form.scope_end)) return true; - builder.consume_spaces(); - if (builder.pos() == builder.input().size()) - throw common_chat_msg_partial_exception("incomplete tool calls"); - LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n", - gbnf_format_literal(form.scope_end).c_str(), - gbnf_format_literal(builder.consume_rest()).c_str() - ); - return return_error(builder, start_pos, recovery); - } - - return true; -} - -/** - * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched. - * May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client. - * form.scope_start, form.tool_sep and form.scope_end can be empty. - */ -bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) { - auto pos = pos_; - auto tsize = result_.tool_calls.size(); - try { return parse_xml_tool_calls(*this, form); } - catch (const xml_toolcall_syntax_exception&) {} - move_to(pos); - result_.tool_calls.resize(tsize); - return false; -} - -/** - * Parse content uses reasoning and XML-Style tool call - * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed. - */ -inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "", const std::string & end_think = "") { - constexpr auto rstrip = [](std::string &s) { - s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base())); - }; - // Erase substring from l to r, along with additional spaces nearby - constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) { - while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast(str[l]))); - ++l; - while (++r < str.size() && std::isspace(static_cast(str[r]))); - if (l < r) str[l] = '\n'; - if (l + 1 < r) str[l + 1] = '\n'; - if (l != 0) l += 2; - str.erase(l, r - l); - return l; - }; - constexpr auto trim_suffix = [](std::string &content, std::initializer_list list) { - auto best_match = content.size(); - for (auto pattern: list) { - if (pattern.size() == 0) continue; - for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) { - auto match_len = content.size() - match_idx; - if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) { - best_match = match_idx; - } - } - } - if (content.size() > best_match) { - content.erase(best_match); - } - }; - const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) { - return trim_suffix(content, { - start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start, - form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "", - form.val_end, form.last_val_end ? form.last_val_end->c_str() : "", - form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "", - form.scope_end - }); - }; - - - // Trim leading spaces without affecting keyword matching - static const common_regex spaces_regex("\\s*"); - { - auto tc = builder.consume_regex(spaces_regex); - auto spaces = builder.str(tc.groups[0]); - auto s1 = spaces.size(); - trim_potential_partial_word(spaces); - auto s2 = spaces.size(); - builder.move_to(builder.pos() - (s1 - s2)); - } - - // Parse content - bool reasoning_unclosed = builder.syntax().thinking_forced_open; - std::string unclosed_reasoning_content(""); - for (;;) { - auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start); - std::string content; - std::string tool_call_start; - - if (tc) { - content = std::move(tc->prelude); - tool_call_start = builder.str(tc->groups[0]); - LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str()); - } else { - content = builder.consume_rest(); - utf8_truncate_safe_resize(content); - } - - // Handle unclosed think block - if (reasoning_unclosed) { - if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) { - unclosed_reasoning_content += content; - if (!(form.allow_toolcall_in_think && tc)) { - unclosed_reasoning_content += tool_call_start; - continue; - } - } else { - reasoning_unclosed = false; - std::string reasoning_content; - if (pos == std::string::npos) { - reasoning_content = std::move(content); - } else { - reasoning_content = content.substr(0, pos); - content.erase(0, pos + end_think.size()); - } - if (builder.pos() == builder.input().size() && all_space(content)) { - rstrip(reasoning_content); - trim_potential_partial_word(reasoning_content); - rstrip(reasoning_content); - if (reasoning_content.empty()) { - rstrip(unclosed_reasoning_content); - trim_potential_partial_word(unclosed_reasoning_content); - rstrip(unclosed_reasoning_content); - if (unclosed_reasoning_content.empty()) continue; - } - } - if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) { - builder.add_content(start_think); - builder.add_content(unclosed_reasoning_content); - builder.add_content(reasoning_content); - if (builder.pos() != builder.input().size() || !all_space(content)) - builder.add_content(end_think); - } else { - builder.add_reasoning_content(unclosed_reasoning_content); - builder.add_reasoning_content(reasoning_content); - } - unclosed_reasoning_content.clear(); - } - } - - // Handle multiple think block - bool toolcall_in_think = false; - for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) { - if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) { - if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) { - auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size()); - builder.add_reasoning_content(reasoning_content); - think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1); - } else { - think_start = think_end + end_think.size() - 1; - } - } else { - // This start is in thinking block, skip this tool call - // This start is in thinking block - if (form.allow_toolcall_in_think) { - unclosed_reasoning_content = content.substr(think_start + start_think.size()); - } else { - unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start; - } - reasoning_unclosed = true; - content.resize(think_start); - toolcall_in_think = true; - } - } - - if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) { - rstrip(content); - // Handle unclosed token from content: delete all token - if (auto pos = content.rfind(end_think); pos != std::string::npos) { - while (pos != std::string::npos) { - pos = erase_spaces(content, pos, pos + end_think.size() - 1); - pos = content.rfind(end_think, pos); - } - } - // Strip if needed - if (content.size() > 0 && std::isspace(static_cast(content[0]))) { - content = string_strip(content); - } - } - - // remove potential partial suffix - if (builder.pos() == builder.input().size()) { - if (unclosed_reasoning_content.empty()) { - rstrip(content); - trim_potential_partial_word(content); - rstrip(content); - } else { - rstrip(unclosed_reasoning_content); - trim_potential_partial_word(unclosed_reasoning_content); - rstrip(unclosed_reasoning_content); - } - } - - // consume unclosed_reasoning_content if allow_toolcall_in_think is set - if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) { - if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) { - builder.add_reasoning_content(unclosed_reasoning_content); - } else { - if (content.empty()) { - content = start_think + unclosed_reasoning_content; - } else { - content += "\n\n" + start_think; - content += unclosed_reasoning_content; - } - } - unclosed_reasoning_content.clear(); - } - - // Add content - if (!content.empty()) { - // If there are multiple content blocks - if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) { - builder.add_content("\n\n"); - } - builder.add_content(content); - } - - // This start is in thinking block and toolcall_in_think not set, skip this tool call - if (toolcall_in_think && !form.allow_toolcall_in_think) { - continue; - } - - // There is no tool call and all content is parsed - if (!tc) { - GGML_ASSERT(builder.pos() == builder.input().size()); - GGML_ASSERT(unclosed_reasoning_content.empty()); - if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed); - break; - } - - builder.move_to(tc->groups[0].begin); - if (builder.try_consume_xml_tool_calls(form)) { - auto end_of_tool = builder.pos(); - builder.consume_spaces(); - if (builder.pos() != builder.input().size()) { - builder.move_to(end_of_tool); - if (!builder.result().content.empty()) { - builder.add_content("\n\n"); - } - } - } else { - static const common_regex next_char_regex("."); - auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]); - rstrip(c); - builder.add_content(c); - } - } -} - -/** - * Parse content uses reasoning and XML-Style tool call - */ -void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) { - parse_msg_with_xml_tool_calls(*this, form, start_think, end_think); -} diff --git a/common/chat-parser-xml-toolcall.h b/common/chat-parser-xml-toolcall.h deleted file mode 100644 index b309fb6670..0000000000 --- a/common/chat-parser-xml-toolcall.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include "chat.h" - -#include - -#include -#include -#include - - -// Sample config: -// MiniMax-M2 (left): \n\nvalue\n...\n... -// GLM 4.5 (right): function_name\nkey\nvalue\n -struct xml_tool_call_format { - std::string scope_start; // \n // \n // can be empty - std::string tool_start; // - std::string tool_sep; // \">\n // \n // can be empty only for parse_xml_tool_calls - std::string key_start; // - std::string key_val_sep; // \"> // \n - std::string val_end; // \n // \n - std::string tool_end; // \n // \n - std::string scope_end; // // // can be empty - // Set this if there can be dynamic spaces inside key_val_sep. - // e.g. key_val_sep= key_val_sep2= for GLM4.5 - std::optional key_val_sep2 = std::nullopt; - // Set true if argval should only be raw string. e.g. Hello "world" hi - // Set false if argval should only be json string. e.g. "Hello \"world\" hi" - // Defaults to std::nullopt, both will be allowed. - std::optional raw_argval = std::nullopt; - std::optional last_val_end = std::nullopt; - std::optional last_tool_end = std::nullopt; - bool trim_raw_argval = false; - bool allow_toolcall_in_think = false; -}; - -// make a GBNF that accept any strings except those containing any of the forbidden strings. -std::string make_gbnf_excluding(std::vector forbids); - -/** - * Build grammar for xml-style tool call - * form.scope_start and form.scope_end can be empty. - * Requires data.format for model-specific hacks. - */ -void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form); diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp deleted file mode 100644 index 29819e48d3..0000000000 --- a/common/chat-parser.cpp +++ /dev/null @@ -1,1669 +0,0 @@ -#include "chat-parser.h" -#include "chat-peg-parser.h" -#include "common.h" -#include "log.h" -#include "peg-parser.h" -#include "regex-partial.h" - -#include -#include -#include -#include -#include -#include -#include - -using json = nlohmann::ordered_json; - -static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, - const common_regex & prefix, - size_t rstrip_prefix = 0) { - static const std::vector> args_paths = { { "arguments" } }; - if (auto res = builder.try_find_regex(prefix)) { - builder.move_back(rstrip_prefix); - auto tool_calls = builder.consume_json_with_dumped_args(args_paths); - if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call array"); - } - } else { - builder.add_content(builder.consume_rest()); - } -} - -static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) { - std::string arguments; - if (builder.is_partial()) { - arguments = (json{ - { "code", code + builder.healing_marker() } - }) - .dump(); - auto idx = arguments.find(builder.healing_marker()); - if (idx != std::string::npos) { - arguments.resize(idx); - } - } else { - arguments = (json{ - { "code", code } - }) - .dump(); - } - return arguments; -} - -/** - * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between. - * Aggregates the prefix, suffix and in-between text into the content. - */ -static void parse_json_tool_calls( - common_chat_msg_parser & builder, - const std::optional & block_open, - const std::optional & function_regex_start_only, - const std::optional & function_regex, - const common_regex & close_regex, - const std::optional & block_close, - bool allow_raw_python = false, - const std::function & get_function_name = - nullptr) { - auto parse_tool_calls = [&]() { - size_t from = std::string::npos; - auto first = true; - while (true) { - auto start_pos = builder.pos(); - auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) : - function_regex ? builder.try_find_regex(*function_regex, from) : - std::nullopt; - - if (res) { - std::string name; - if (get_function_name) { - name = get_function_name(*res); - } else { - GGML_ASSERT(res->groups.size() == 2); - name = builder.str(res->groups[1]); - } - first = false; - if (name.empty()) { - // get_function_name signalled us that we should skip this match and treat it as content. - from = res->groups[0].begin + 1; - continue; - } - from = std::string::npos; - - auto maybe_raw_python = name == "python" && allow_raw_python; - if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) { - if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) { - if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_regex(close_regex); - } - continue; - } - if (maybe_raw_python) { - auto arguments = wrap_code_as_arguments(builder, builder.consume_rest()); - if (!builder.add_tool_call(name, "", arguments)) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - return; - } - throw common_chat_msg_partial_exception("incomplete tool call"); - } else { - builder.move_to(start_pos); - } - break; - } - if (block_close) { - builder.consume_regex(*block_close); - } - builder.consume_spaces(); - builder.add_content(builder.consume_rest()); - }; - if (block_open) { - if (auto res = builder.try_find_regex(*block_open)) { - parse_tool_calls(); - } else { - builder.add_content(builder.consume_rest()); - } - } else { - parse_tool_calls(); - } -} - -common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) - : input_(input), is_partial_(is_partial), syntax_(syntax) -{ - result_.role = "assistant"; - - while (true) { - std::string id = std::to_string(std::rand()); - if (input.find(id) == std::string::npos) { - healing_marker_ = id; - break; - } - } -} - -std::string common_chat_msg_parser::str(const common_string_range & rng) const { - GGML_ASSERT(rng.begin <= rng.end); - return input_.substr(rng.begin, rng.end - rng.begin); -} - -void common_chat_msg_parser::add_content(const std::string &content) { - result_.content += content; -} - -void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) { - result_.reasoning_content += reasoning_content; -} - -bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) { - if (name.empty()) { - return false; - } - - common_chat_tool_call tool_call; - tool_call.name = name; - tool_call.arguments = arguments; - tool_call.id = id; - - // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str()); - result_.tool_calls.emplace_back(tool_call); - - return true; -} -bool common_chat_msg_parser::add_tool_call(const json & tool_call) { - std::string name = tool_call.contains("name") ? tool_call.at("name") : ""; - std::string id = tool_call.contains("id") ? tool_call.at("id") : ""; - std::string arguments = ""; - if (tool_call.contains("arguments")) { - if (tool_call.at("arguments").is_object()) { - arguments = tool_call.at("arguments").dump(); - } else { - arguments = tool_call.at("arguments"); - } - } - - return add_tool_call(name, id, arguments); -} - -bool common_chat_msg_parser::add_tool_calls(const json & arr) { - for (const auto & item : arr) { - if (!add_tool_call(item)) { - return false; - } - } - return true; -} - -bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) { - if (!tool_call.is_object() || tool_call.size() != 1) { - return false; - } - - // Get the tool name (the single key in the object) - auto it = tool_call.begin(); - std::string name = it.key(); - - if (name.empty()) { - return false; - } - - // Get the arguments (the nested object) - const json & args_json = it.value(); - std::string arguments = ""; - - if (args_json.is_object()) { - arguments = args_json.dump(); - } else if (args_json.is_string()) { - arguments = args_json; - } else if (!args_json.is_null()) { - // For other types, convert to string representation - arguments = args_json.dump(); - } - - return add_tool_call(name, "", arguments); -} -void common_chat_msg_parser::finish() { - if (!is_partial_ && pos_ != input_.size()) { - throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_)); - } -} - -bool common_chat_msg_parser::consume_spaces() { - const auto length = input_.size(); - auto consumed = false; - while (pos_ < length && std::isspace(input_[pos_])) { - ++pos_; - consumed = true; - } - return consumed; -} - -bool common_chat_msg_parser::try_consume_literal(const std::string & literal) { - auto pos = pos_; - for (auto i = 0u; i < literal.size(); ++i) { - if (pos >= input_.size()) { - return false; - } - if (input_[pos] != literal[i]) { - return false; - } - ++pos; - } - pos_ = pos; - return true; -} - -std::optional common_chat_msg_parser::try_find_literal(const std::string & literal) { - auto idx = input_.find(literal, pos_); - if (idx != std::string::npos) { - find_regex_result res; - res.prelude = input_.substr(pos_, idx - pos_); - auto end = idx + literal.size(); - res.groups.emplace_back(common_string_range{idx, end}); - move_to(end); - return res; - } - if (is_partial_) { - idx = string_find_partial_stop(input_, literal); - if (idx != std::string::npos && idx >= pos_) { - find_regex_result res; - res.prelude = input_.substr(pos_, idx - pos_); - auto end = input_.size(); - res.groups.emplace_back(common_string_range{idx, end}); - move_to(end); - return res; - } - } - return std::nullopt; -} - -void common_chat_msg_parser::consume_literal(const std::string & literal) { - if (!try_consume_literal(literal)) { - throw common_chat_msg_partial_exception(literal); - } -} - -bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) { - std::string pending_reasoning_prefix; - - if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) { - return false; - } - - auto set_reasoning_prefix = [&](size_t prefix_pos) { - if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) { - return; - } - if (prefix_pos + start_think.size() > input_.size()) { - pending_reasoning_prefix.clear(); - return; - } - // Capture the exact literal that opened the reasoning section so we can - // surface it back to callers. This ensures formats that force the - // reasoning tag open (e.g. DeepSeek R1) retain their original prefix - // instead of dropping it during parsing. - pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size()); - }; - - auto handle_reasoning = [&](const std::string & reasoning, bool closed) { - auto stripped_reasoning = string_strip(reasoning); - if (stripped_reasoning.empty()) { - return; - } - if (syntax_.reasoning_in_content) { - add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "" : start_think); - add_content(stripped_reasoning); - if (closed) { - add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "" : end_think); - } - } else { - if (!pending_reasoning_prefix.empty()) { - add_reasoning_content(pending_reasoning_prefix); - pending_reasoning_prefix.clear(); - } - add_reasoning_content(stripped_reasoning); - } - }; - - const size_t saved_pos = pos_; - const size_t saved_content_size = result_.content.size(); - const size_t saved_reasoning_size = result_.reasoning_content.size(); - - auto restore_state = [&]() { - move_to(saved_pos); - result_.content.resize(saved_content_size); - result_.reasoning_content.resize(saved_reasoning_size); - }; - - // Allow leading whitespace to be preserved as content when reasoning is present at the start - size_t cursor = pos_; - size_t whitespace_end = cursor; - while (whitespace_end < input_.size() && std::isspace(static_cast(input_[whitespace_end]))) { - ++whitespace_end; - } - - if (whitespace_end >= input_.size()) { - restore_state(); - if (syntax_.thinking_forced_open) { - auto rest = input_.substr(saved_pos); - if (!rest.empty()) { - handle_reasoning(rest, /* closed */ !is_partial()); - } - move_to(input_.size()); - return true; - } - return false; - } - - cursor = whitespace_end; - const size_t remaining = input_.size() - cursor; - const size_t start_prefix = std::min(start_think.size(), remaining); - const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0; - - if (has_start_tag && start_prefix < start_think.size()) { - move_to(input_.size()); - return true; - } - - if (has_start_tag) { - if (whitespace_end > pos_) { - add_content(input_.substr(pos_, whitespace_end - pos_)); - } - set_reasoning_prefix(cursor); - cursor += start_think.size(); - } else if (syntax_.thinking_forced_open) { - cursor = whitespace_end; - } else { - restore_state(); - return false; - } - while (true) { - if (cursor >= input_.size()) { - move_to(input_.size()); - return true; - } - - size_t end_pos = input_.find(end_think, cursor); - if (end_pos == std::string::npos) { - std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor); - size_t partial_off = string_find_partial_stop(remaining_view, end_think); - size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off; - if (reasoning_end > cursor) { - handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial()); - } - move_to(input_.size()); - return true; - } - - if (end_pos > cursor) { - handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true); - } else { - handle_reasoning("", /* closed */ true); - } - - cursor = end_pos + end_think.size(); - - while (cursor < input_.size() && std::isspace(static_cast(input_[cursor]))) { - ++cursor; - } - - const size_t next_remaining = input_.size() - cursor; - if (next_remaining == 0) { - move_to(cursor); - return true; - } - - const size_t next_prefix = std::min(start_think.size(), next_remaining); - if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) { - if (next_prefix < start_think.size()) { - move_to(input_.size()); - return true; - } - set_reasoning_prefix(cursor); - cursor += start_think.size(); - continue; - } - - move_to(cursor); - return true; - } -} - -std::string common_chat_msg_parser::consume_rest() { - auto rest = input_.substr(pos_); - pos_ = input_.size(); - return rest; -} - -// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback. -std::optional common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) { - auto m = regex.search(input_, from == std::string::npos ? pos_ : from); - if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) { - return std::nullopt; - } - auto prelude = input_.substr(pos_, m.groups[0].begin - pos_); - pos_ = m.groups[0].end; - - if (add_prelude_to_content) { - add_content(prelude); - } - if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) { - if (is_partial()) { - throw common_chat_msg_partial_exception(regex.str()); - } - return std::nullopt; - } - return find_regex_result{prelude, m.groups}; -} - -common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) { - if (auto result = try_consume_regex(regex)) { - return *result; - } - throw common_chat_msg_partial_exception(regex.str()); -} - -std::optional common_chat_msg_parser::try_consume_regex(const common_regex & regex) { - auto m = regex.search(input_, pos_); - if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) { - return std::nullopt; - } - if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) { - if (is_partial()) { - throw common_chat_msg_partial_exception(regex.str()); - } - return std::nullopt; - } - if (m.groups[0].begin != pos_) { - // Didn't match at the current position. - return std::nullopt; - } - pos_ = m.groups[0].end; - - return find_regex_result { - /* .prelude = */ "", - m.groups, - }; -} - -std::optional common_chat_msg_parser::try_consume_json() { - auto it = input_.cbegin() + pos_; - const auto end = input_.cend(); - common_json result; - if (!common_json_parse(it, end, healing_marker_, result)) { - return std::nullopt; - } - pos_ = std::distance(input_.cbegin(), it); - if (result.healing_marker.marker.empty()) { - // No healing marker, just return the parsed json - return result; - } - if (!is_partial()) { - throw common_chat_msg_partial_exception("JSON"); - } - return result; -} - -common_json common_chat_msg_parser::consume_json() { - if (auto result = try_consume_json()) { - return *result; - } - throw common_chat_msg_partial_exception("JSON"); -} - -common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args( - const std::vector> & args_paths, - const std::vector> & content_paths -) { - if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) { - return *result; - } - throw common_chat_msg_partial_exception("JSON"); -} - -std::optional common_chat_msg_parser::try_consume_json_with_dumped_args( - const std::vector> & args_paths, - const std::vector> & content_paths -) { - auto partial = try_consume_json(); - if (!partial) { - return std::nullopt; - } - auto is_arguments_path = [&](const std::vector & path) { - return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end(); - }; - auto is_content_path = [&](const std::vector & path) { - return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end(); - }; - - if (partial->healing_marker.marker.empty()) { - if (args_paths.empty()) { - // No arguments to dump, and JSON was parsed fully. - return consume_json_result { - partial->json, - /* .is_partial = */ false, - }; - } - if (is_arguments_path({})) { - // Entire JSON is the arguments and was parsed fully. - return consume_json_result { - partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true), - /* .is_partial = */ false, - }; - } - } - - LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str()); - - auto found_healing_marker = false; - std::vector path; - std::function remove_unsupported_healings_and_dump_args = [&](const json & j) -> json { - if (is_arguments_path(path)) { - auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true); - if (is_partial() && !partial->healing_marker.marker.empty()) { - auto idx = arguments.find(partial->healing_marker.json_dump_marker); - if (idx != std::string::npos) { - arguments.resize(idx); - found_healing_marker = true; - } - if (arguments == "\"") { - // This happens because of completing `:"$magic` after `"arguments"` - arguments = ""; - } - } - return arguments; - } - if (is_content_path(path)) { - if (!j.is_string()) { - throw std::runtime_error("Content path must be a string"); - } - std::string str = j; - auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string - if (idx != std::string::npos) { - str.resize(idx); - found_healing_marker = true; - } - return str; - } - if (j.is_object()) { - auto obj = json::object(); - for (const auto & p : j.items()) { - const auto & key = p.key(); - const auto & value = p.value(); - const std::string key_str = key; // NOLINT - auto idx = key_str.find(healing_marker_); - if (idx != std::string::npos) { - found_healing_marker = true; - break; - } - path.push_back(key_str); - if (value.is_string()) { - const std::string value_str = value; - if (value_str.find(healing_marker_) != std::string::npos) { - found_healing_marker = true; - if (is_content_path(path)) { - if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) { - // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair. - obj[key] = remove_unsupported_healings_and_dump_args(value); - } - } - break; - } - obj[key] = value; - } else { - obj[key] = remove_unsupported_healings_and_dump_args(value); - } - path.pop_back(); - } - return obj; - } - if (j.is_array()) { - auto arr = json::array(); - for (const auto & value : j) { - if (value.is_string()) { - std::string str = value; - auto idx = str.find(healing_marker_); - if (idx != std::string::npos) { - // Don't heal array values that aren't in the arguments. - found_healing_marker = true; - break; - } - } - arr.push_back(remove_unsupported_healings_and_dump_args(value)); - } - return arr; - } - return j; - }; - - auto cleaned = remove_unsupported_healings_and_dump_args(partial->json); - LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str()); - return consume_json_result { - cleaned, - /* .is_partial = */ found_healing_marker, - }; -} - -void common_chat_msg_parser::clear_tools() { - result_.tool_calls.clear(); -} - -/** - * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below - * to reduce incremental compile time for parser changes. - */ -static void common_chat_parse_generic(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - static const std::vector> content_paths = { - {"response"}, - }; - static const std::vector> args_paths = { - {"tool_call", "arguments"}, - {"tool_calls", "arguments"}, - }; - auto data = builder.consume_json_with_dumped_args(args_paths, content_paths); - if (data.value.contains("tool_calls")) { - if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool calls"); - } - } else if (data.value.contains("tool_call")) { - if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } else if (data.value.contains("response")) { - const auto & response = data.value.at("response"); - builder.add_content(response.is_string() ? response.template get() : response.dump(2)); - if (data.is_partial) { - throw common_chat_msg_partial_exception("incomplete response"); - } - } else { - throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON"); - } -} - -static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex prefix(regex_escape("[TOOL_CALLS]")); - parse_prefixed_json_tool_call_array(builder, prefix); -} - -static void common_chat_parse_magistral(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("[THINK]", "[/THINK]"); - - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex prefix(regex_escape("[TOOL_CALLS]")); - parse_prefixed_json_tool_call_array(builder, prefix); -} - -static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>"); - - static const common_regex start_action_regex("<\\|START_ACTION\\|>"); - static const common_regex end_action_regex("<\\|END_ACTION\\|>"); - static const common_regex start_response_regex("<\\|START_RESPONSE\\|>"); - static const common_regex end_response_regex("<\\|END_RESPONSE\\|>"); - - if (auto res = builder.try_find_regex(start_action_regex)) { - // If we didn't extract thoughts, prelude includes them. - auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}}); - for (const auto & tool_call : tool_calls.value) { - std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : ""; - std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : ""; - std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : ""; - if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - if (tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_regex(end_action_regex); - } else if (auto res = builder.try_find_regex(start_response_regex)) { - if (!builder.try_find_regex(end_response_regex)) { - builder.add_content(builder.consume_rest()); - throw common_chat_msg_partial_exception(end_response_regex.str()); - } - } else { - builder.add_content(builder.consume_rest()); - } -} - -static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) { - builder.try_parse_reasoning("", ""); - - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex function_regex( - "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: "); - static const common_regex close_regex("\\}\\s*"); - - static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\("); - static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*"); - - if (with_builtin_tools) { - static const common_regex builtin_call_regex("<\\|python_tag\\|>"); - if (auto res = builder.try_find_regex(builtin_call_regex)) { - auto fun_res = builder.consume_regex(function_name_regex); - auto function_name = builder.str(fun_res.groups[1]); - - common_healing_marker healing_marker; - json args = json::object(); - while (true) { - if (auto arg_res = builder.try_consume_regex(arg_name_regex)) { - auto arg_name = builder.str(arg_res->groups[1]); - auto partial = builder.consume_json(); - args[arg_name] = partial.json; - healing_marker.marker = partial.healing_marker.marker; - healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker; - builder.consume_spaces(); - if (!builder.try_consume_literal(",")) { - break; - } - } else { - break; - } - } - builder.consume_literal(")"); - builder.consume_spaces(); - - auto arguments = args.dump(); - if (!builder.add_tool_call(function_name, "", arguments)) { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - return; - } - } - parse_json_tool_calls( - builder, - /* block_open= */ std::nullopt, - /* function_regex_start_only= */ function_regex, - /* function_regex= */ std::nullopt, - close_regex, - std::nullopt); - -} - -static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("", ""); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)"); - static const common_regex tool_calls_end("<|tool▁calls▁end|>"); - static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n"); - static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>"); - - parse_json_tool_calls( - builder, - /* block_open= */ tool_calls_begin, - /* function_regex_start_only= */ std::nullopt, - function_regex, - close_regex, - tool_calls_end); -} - -static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) { - static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)"); - - static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>"); - static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)"); - static const common_regex tool_calls_end("<|tool▁calls▁end|>"); - - if (!builder.syntax().parse_tool_calls) { - LOG_DBG("%s: not parse_tool_calls\n", __func__); - builder.add_content(builder.consume_rest()); - return; - } - - LOG_DBG("%s: parse_tool_calls\n", __func__); - - parse_json_tool_calls( - builder, - /* block_open= */ tool_calls_begin, - /* function_regex_start_only= */ std::nullopt, - function_regex, - close_regex, - tool_calls_end); -} - -static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) { - // DeepSeek V3.1 outputs reasoning content between "" and "" tags, followed by regular content - // First try to parse using the standard reasoning parsing method - LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str()); - - auto start_pos = builder.pos(); - auto found_end_think = builder.try_find_literal(""); - builder.move_to(start_pos); - - if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) { - LOG_DBG("%s: no end_think, not partial, adding content\n", __func__); - common_chat_parse_deepseek_v3_1_content(builder); - } else if (builder.try_parse_reasoning("", "")) { - // If reasoning was parsed successfully, the remaining content is regular content - LOG_DBG("%s: parsed reasoning, adding content\n", __func__); - // <|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|> - common_chat_parse_deepseek_v3_1_content(builder); - } else { - if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) { - LOG_DBG("%s: reasoning_format none, adding content\n", __func__); - common_chat_parse_deepseek_v3_1_content(builder); - return; - } - // If no reasoning tags found, check if we should treat everything as reasoning - if (builder.syntax().thinking_forced_open) { - // If thinking is forced open but no tags found, treat everything as reasoning - LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__); - builder.add_reasoning_content(builder.consume_rest()); - } else { - LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__); - // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|> - common_chat_parse_deepseek_v3_1_content(builder); - } - } -} - -static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) { - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "", - /* form.key_start = */ "", - /* form.val_end = */ "", - /* form.tool_end = */ "", - /* form.scope_end = */ "", - }; - builder.consume_reasoning_with_xml_tool_calls(form, "", ""); -} - -static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) { - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = ""; - form.tool_start = "", ""); -} - -static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) { - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = "["; - form.tool_start = "{\"name\": \""; - form.tool_sep = "\", \"arguments\": {"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}, "; - form.scope_end = "]"; - form.raw_argval = false; - form.last_val_end = ""; - form.last_tool_end = "}"; - return form; - })(); - builder.consume_reasoning_with_xml_tool_calls(form, "", ""); -} - -static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) { - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = ""; - form.tool_start = "\n{\"name\": \""; - form.tool_sep = "\", \"arguments\": {"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}\n"; - form.scope_end = ""; - form.raw_argval = false; - form.last_val_end = ""; - return form; - })(); - builder.consume_reasoning_with_xml_tool_calls(form); -} - -static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) { - static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))"; - static const std::string recipient("(?: to=functions\\.([^<\\s]+))"); - - static const common_regex start_regex("<\\|start\\|>assistant"); - static const common_regex analysis_regex("<\\|channel\\|>analysis"); - static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?"); - static const common_regex preamble_regex("<\\|channel\\|>commentary"); - static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?"); - static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?"); - - auto consume_end = [&](bool include_end = false) { - if (auto res = builder.try_find_literal("<|end|>")) { - return res->prelude + (include_end ? builder.str(res->groups[0]) : ""); - } - return builder.consume_rest(); - }; - - auto handle_tool_call = [&](const std::string & name) { - if (auto args = builder.try_consume_json_with_dumped_args({{}})) { - if (builder.syntax().parse_tool_calls) { - if (!builder.add_tool_call(name, "", args->value) || args->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } else if (args->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - }; - - auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional { - auto match = regex.search(input, 0, true); - if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) { - return match; - } - return std::nullopt; - }; - - do { - auto header_start_pos = builder.pos(); - auto content_start = builder.try_find_literal("<|message|>"); - if (!content_start) { - throw common_chat_msg_partial_exception("incomplete header"); - } - - auto header = content_start->prelude; - - if (auto match = regex_match(tool_call1_regex, header)) { - auto group = match->groups[1]; - auto name = header.substr(group.begin, group.end - group.begin); - handle_tool_call(name); - continue; - } - - if (auto match = regex_match(tool_call2_regex, header)) { - auto group = match->groups[2]; - auto name = header.substr(group.begin, group.end - group.begin); - handle_tool_call(name); - continue; - } - - if (regex_match(analysis_regex, header)) { - builder.move_to(header_start_pos); - if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) { - builder.add_content(consume_end(true)); - } else { - builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>"); - } - continue; - } - - if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) { - builder.add_content(consume_end()); - continue; - } - - // Possibly a malformed message, attempt to recover by rolling - // back to pick up the next <|start|> - LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str()); - builder.move_to(header_start_pos); - } while (builder.try_find_regex(start_regex, std::string::npos, false)); - - auto remaining = builder.consume_rest(); - if (!remaining.empty()) { - LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str()); - } -} - -static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) { - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "", - /* form.tool_sep = */ "", - /* form.key_start = */ "", - /* form.key_val_sep = */ "", - /* form.val_end = */ "", - /* form.tool_end = */ "", - /* form.scope_end = */ "", - /* form.key_val_sep2 = */ "", - }; - builder.consume_reasoning_with_xml_tool_calls(form, "", ""); -} - -static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - static const common_regex prefix(regex_escape(" functools[")); - parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1); -} - -static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) { - static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))"); - static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))"); - static const common_regex close_regex(R"(\s*)"); - - parse_json_tool_calls( - builder, - std::nullopt, - function_regex_start_only, - function_regex, - close_regex, - std::nullopt, - /* allow_raw_python= */ true, - /* get_function_name= */ [&](const auto & res) -> std::string { - auto at_start = res.groups[0].begin == 0; - auto name = builder.str(res.groups[1]); - if (!name.empty() && name.back() == '{') { - // Unconsume the opening brace '{' to ensure the JSON parsing goes well. - builder.move_back(1); - } - auto idx = name.find_last_not_of("\n{"); - name = name.substr(0, idx + 1); - if (at_start && name == "all") { - return ""; - } - return name; - }); -} - -static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - // This version of Functionary still supports the llama 3.1 tool call format for the python tool. - static const common_regex python_tag_regex(regex_escape("<|python_tag|>")); - - static const common_regex function_regex(R"()"); - static const common_regex close_regex(R"()"); - - parse_json_tool_calls( - builder, - /* block_open= */ std::nullopt, - /* function_regex_start_only= */ std::nullopt, - function_regex, - close_regex, - std::nullopt); - - if (auto res = builder.try_find_regex(python_tag_regex)) { - auto arguments = wrap_code_as_arguments(builder, builder.consume_rest()); - builder.add_tool_call("python", "", arguments); - return; - } -} - -static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("", ""); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - static const common_regex open_regex( - "(?:" - "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start) - "(" // match 2 (open_tag) - "" - "|" - "|" - "|" - "|" - "|" - "|" - "|" - ")?" - "(\\s*\\{\\s*\"name\")" // match 3 (named tool call) - ")" - "|]+)>" // match 4 (function name) - "|" // match 5 (function name again) - ); - - while (auto res = builder.try_find_regex(open_regex)) { - const auto & block_start = res->groups[1]; - std::string block_end = block_start.empty() ? "" : "```"; - - const auto & open_tag = res->groups[2]; - std::string close_tag; - - if (!res->groups[3].empty()) { - builder.move_to(res->groups[3].begin); - close_tag = open_tag.empty() ? "" : "value) || tool_call->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_spaces(); - builder.consume_literal(close_tag); - builder.consume_spaces(); - if (!block_end.empty()) { - builder.consume_literal(block_end); - builder.consume_spaces(); - } - } else { - throw common_chat_msg_partial_exception("failed to parse tool call"); - } - } else { - auto function_name = builder.str(res->groups[4]); - if (function_name.empty()) { - function_name = builder.str(res->groups[5]); - } - GGML_ASSERT(!function_name.empty()); - - close_tag = ""; - - if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) { - if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_spaces(); - builder.consume_literal(close_tag); - builder.consume_spaces(); - if (!block_end.empty()) { - builder.consume_literal(block_end); - builder.consume_spaces(); - } - } - } - } - - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse_granite(common_chat_msg_parser & builder) { - // Parse thinking tags - static const common_regex start_think_regex(regex_escape("")); - static const common_regex end_think_regex(regex_escape("")); - // Granite models output partial tokens such as "<" and "groups[0].begin); - builder.try_find_regex(end_think_regex, std::string::npos, false); - // Restore position for try_parse_reasoning() - builder.move_to(res->groups[0].begin); - } - builder.try_parse_reasoning("", ""); - - // Parse response tags - static const common_regex start_response_regex(regex_escape("")); - static const common_regex end_response_regex(regex_escape("")); - // Granite models output partial tokens such as "<" and "")); - if (auto res = builder.try_find_regex(tool_call_regex)) { - builder.move_to(res->groups[0].end); - - // Expect JSON array of tool calls - if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) { - if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - } else { - builder.add_content(builder.consume_rest()); - } -} - -static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) { - // Parse thinking tags - builder.try_parse_reasoning("", ""); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - // Look for tool calls - static const common_regex tool_call_regex(regex_escape("")); - if (auto res = builder.try_find_regex(tool_call_regex)) { - builder.move_to(res->groups[0].end); - - // Expect JSON array of tool calls - auto tool_calls_data = builder.consume_json(); - if (tool_calls_data.json.is_array()) { - if (!builder.try_consume_literal("")) { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - builder.add_tool_calls(tool_calls_data.json); - } else { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - } - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse_apertus(common_chat_msg_parser & builder) { - // Parse thinking tags - builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>"); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - // Look for tool calls - static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>")); - if (auto res = builder.try_find_regex(tool_call_regex)) { - builder.move_to(res->groups[0].end); - - auto tool_calls_data = builder.consume_json(); - if (tool_calls_data.json.is_array()) { - builder.consume_spaces(); - if (!builder.try_consume_literal("<|tools_suffix|>")) { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - for (const auto & value : tool_calls_data.json) { - if (value.is_object()) { - builder.add_tool_call_short_form(value); - } - } - } else { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - } - builder.add_content(builder.consume_rest()); -} - - -static void common_chat_parse_lfm2(common_chat_msg_parser & builder) { - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; - } - - // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|> - static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>")); - static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>")); - - // Loop through all tool calls - while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) { - builder.move_to(res->groups[0].end); - - // Parse JSON array format: [{"name": "...", "arguments": {...}}] - auto tool_calls_data = builder.consume_json(); - - // Consume end marker - builder.consume_spaces(); - if (!builder.try_consume_regex(tool_call_end_regex)) { - throw common_chat_msg_partial_exception("Expected <|tool_call_end|>"); - } - - // Process each tool call in the array - if (tool_calls_data.json.is_array()) { - for (const auto & tool_call : tool_calls_data.json) { - if (!tool_call.is_object()) { - throw common_chat_msg_partial_exception("Tool call must be an object"); - } - - if (!tool_call.contains("name")) { - throw common_chat_msg_partial_exception("Tool call missing 'name' field"); - } - - std::string function_name = tool_call.at("name"); - std::string arguments = "{}"; - - if (tool_call.contains("arguments")) { - if (tool_call.at("arguments").is_object()) { - arguments = tool_call.at("arguments").dump(); - } else if (tool_call.at("arguments").is_string()) { - arguments = tool_call.at("arguments"); - } - } - - if (!builder.add_tool_call(function_name, "", arguments)) { - throw common_chat_msg_partial_exception("Incomplete tool call"); - } - } - } else { - throw common_chat_msg_partial_exception("Expected JSON array for tool calls"); - } - - // Consume any trailing whitespace after this tool call - builder.consume_spaces(); - } - - // Consume any remaining content after all tool calls - auto remaining = builder.consume_rest(); - if (!string_strip(remaining).empty()) { - builder.add_content(remaining); - } -} - -static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) { - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "", - /* form.key_start = */ "", - /* form.val_end = */ "", - /* form.tool_end = */ "", - /* form.scope_end = */ "", - }; - builder.consume_reasoning_with_xml_tool_calls(form, "", ""); -} - -static void common_chat_parse_solar_open(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>"); - - // TODO: Tool calling - - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) { - // 1) { "name": "...", "arguments": {...} } - // 2) { "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } } - static const common_regex tool_call_open(R"(]*>)"); - - if (!builder.syntax().parse_tool_calls) { - LOG_DBG("%s: not parse_tool_calls\n", __func__); - builder.add_content(builder.consume_rest()); - return; - } - - LOG_DBG("%s: parse_tool_calls\n", __func__); - - // Find all blocks - while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) { - builder.move_to(first->groups[0].end); - builder.consume_spaces(); - - builder.try_consume_literal("```json"); - builder.try_consume_literal("```"); - builder.consume_spaces(); - - // Consume JSON object - auto data = builder.consume_json(); - - builder.consume_spaces(); - builder.try_consume_literal("```"); - builder.consume_spaces(); - - if (!builder.try_consume_literal("")) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_spaces(); - - // Extract name and arguments - std::string name; - std::string id; - nlohmann::ordered_json arguments; - - const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool { - if (!obj.contains("name") || !obj.contains("arguments")) { - return false; - } - name = obj.at("name").get(); - arguments = obj.at("arguments"); - if (obj.contains("id") && obj.at("id").is_string()) { - id = obj.at("id").get(); - } - return true; - }; - - if (!extract_args(data.json)) { - if (data.json.contains("function") && data.json.at("function").is_object()) { - auto fn = data.json.at("function"); - extract_args(fn); - if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) { - id = data.json.at("id").get(); - } - } - } - - // If name is empty, treat the JSON object as content - if (name.empty()) { - LOG_DBG("%s: tool call missing name, treating as content\n", __func__); - builder.add_content(data.json.dump()); - continue; - } - - std::string args_str = arguments.dump(); - if (!builder.add_tool_call(name, id, args_str)) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) { - LOG_DBG("%s: parsing exaone_moe\n", __func__); - // EXAONE MoE outputs reasoning content between "" and "" tags, followed by regular content - // First try to parse using the standard reasoning parsing method - LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str()); - - auto start_pos = builder.pos(); - auto found_end_think = builder.try_find_literal(""); - builder.move_to(start_pos); - - if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) { - LOG_DBG("%s: no end_think, not partial, adding content\n", __func__); - common_chat_parse_exaone_moe_content(builder); - } else if (builder.try_parse_reasoning("", "")) { - // If reasoning was parsed successfully, the remaining content is regular content - LOG_DBG("%s: parsed reasoning, adding content\n", __func__); - common_chat_parse_exaone_moe_content(builder); - } else { - if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) { - LOG_DBG("%s: reasoning_format none, adding content\n", __func__); - common_chat_parse_exaone_moe_content(builder); - return; - } - // If no reasoning tags found, check if we should treat everything as reasoning - if (builder.syntax().thinking_forced_open) { - // If thinking is forced open but no tags found, treat everything as reasoning - LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__); - builder.add_reasoning_content(builder.consume_rest()); - } else { - LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__); - common_chat_parse_exaone_moe_content(builder); - } - } -} - -static void common_chat_parse_content_only(common_chat_msg_parser & builder) { - builder.try_parse_reasoning("", ""); - builder.add_content(builder.consume_rest()); -} - -static void common_chat_parse(common_chat_msg_parser & builder) { - LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str()); - - switch (builder.syntax().format) { - case COMMON_CHAT_FORMAT_CONTENT_ONLY: - common_chat_parse_content_only(builder); - break; - case COMMON_CHAT_FORMAT_GENERIC: - common_chat_parse_generic(builder); - break; - case COMMON_CHAT_FORMAT_MISTRAL_NEMO: - common_chat_parse_mistral_nemo(builder); - break; - case COMMON_CHAT_FORMAT_MAGISTRAL: - common_chat_parse_magistral(builder); - break; - case COMMON_CHAT_FORMAT_LLAMA_3_X: - common_chat_parse_llama_3_1(builder); - break; - case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: - common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true); - break; - case COMMON_CHAT_FORMAT_DEEPSEEK_R1: - common_chat_parse_deepseek_r1(builder); - break; - case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: - common_chat_parse_deepseek_v3_1(builder); - break; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: - common_chat_parse_functionary_v3_2(builder); - break; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: - common_chat_parse_functionary_v3_1_llama_3_1(builder); - break; - case COMMON_CHAT_FORMAT_HERMES_2_PRO: - common_chat_parse_hermes_2_pro(builder); - break; - case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: - common_chat_parse_firefunction_v2(builder); - break; - case COMMON_CHAT_FORMAT_COMMAND_R7B: - common_chat_parse_command_r7b(builder); - break; - case COMMON_CHAT_FORMAT_GRANITE: - common_chat_parse_granite(builder); - break; - case COMMON_CHAT_FORMAT_GPT_OSS: - common_chat_parse_gpt_oss(builder); - break; - case COMMON_CHAT_FORMAT_SEED_OSS: - common_chat_parse_seed_oss(builder); - break; - case COMMON_CHAT_FORMAT_NEMOTRON_V2: - common_chat_parse_nemotron_v2(builder); - break; - case COMMON_CHAT_FORMAT_APERTUS: - common_chat_parse_apertus(builder); - break; - case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: - common_chat_parse_lfm2(builder); - break; - case COMMON_CHAT_FORMAT_MINIMAX_M2: - common_chat_parse_minimax_m2(builder); - break; - case COMMON_CHAT_FORMAT_GLM_4_5: - common_chat_parse_glm_4_5(builder); - break; - case COMMON_CHAT_FORMAT_KIMI_K2: - common_chat_parse_kimi_k2(builder); - break; - case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: - common_chat_parse_qwen3_coder_xml(builder); - break; - case COMMON_CHAT_FORMAT_APRIEL_1_5: - common_chat_parse_apriel_1_5(builder); - break; - case COMMON_CHAT_FORMAT_XIAOMI_MIMO: - common_chat_parse_xiaomi_mimo(builder); - break; - case COMMON_CHAT_FORMAT_SOLAR_OPEN: - common_chat_parse_solar_open(builder); - break; - case COMMON_CHAT_FORMAT_EXAONE_MOE: - common_chat_parse_exaone_moe(builder); - break; - default: - throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format)); - } - builder.finish(); -} - -common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) { - if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE || - syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE || - syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) { - return common_chat_peg_parse(syntax.parser, input, is_partial, syntax); - } - common_chat_msg_parser builder(input, is_partial, syntax); - try { - common_chat_parse(builder); - } catch (const common_chat_msg_partial_exception & ex) { - LOG_DBG("Partial parse: %s\n", ex.what()); - if (!is_partial) { - builder.clear_tools(); - builder.move_to(0); - common_chat_parse_content_only(builder); - } - } - auto msg = builder.result(); - if (!is_partial) { - LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str()); - } - return msg; -} - -common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) { - if (parser.empty()) { - throw std::runtime_error("Failed to parse due to missing parser definition."); - } - - LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str()); - - common_peg_parse_context ctx(input, is_partial); - auto result = parser.parse(ctx); - if (result.fail()) { - throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end)); - } - - common_chat_msg msg; - msg.role = "assistant"; - - if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) { - auto mapper = common_chat_peg_native_mapper(msg); - mapper.from_ast(ctx.ast, result); - } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) { - auto mapper = common_chat_peg_constructed_mapper(msg); - mapper.from_ast(ctx.ast, result); - } else { - // Generic mapper - auto mapper = common_chat_peg_mapper(msg); - mapper.from_ast(ctx.ast, result); - } - if (!is_partial) { - LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str()); - } - return msg; -} diff --git a/common/chat-parser.h b/common/chat-parser.h deleted file mode 100644 index 3ed9c30a2b..0000000000 --- a/common/chat-parser.h +++ /dev/null @@ -1,133 +0,0 @@ -#pragma once - -#include "chat.h" -#include "chat-parser-xml-toolcall.h" -#include "json-partial.h" -#include "regex-partial.h" - -#include - -#include -#include -#include - -class common_chat_msg_partial_exception : public std::runtime_error { - public: - common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {} -}; - -class common_chat_msg_parser { - std::string input_; - bool is_partial_; - common_chat_parser_params syntax_; // TODO: rename to params - std::string healing_marker_; - - size_t pos_ = 0; - common_chat_msg result_; - - public: - common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax); - const std::string & input() const { return input_; } - size_t pos() const { return pos_; } - const std::string & healing_marker() const { return healing_marker_; } - const bool & is_partial() const { return is_partial_; } - const common_chat_msg & result() const { return result_; } - const common_chat_parser_params & syntax() const { return syntax_; } - - void move_to(size_t pos) { - if (pos > input_.size()) { - throw std::runtime_error("Invalid position!"); - } - pos_ = pos; - } - void move_back(size_t n) { - if (pos_ < n) { - throw std::runtime_error("Can't move back that far!"); - } - pos_ -= n; - } - - // Get the substring of the input at the given range - std::string str(const common_string_range & rng) const; - - // Appends to the result.content field - void add_content(const std::string & content); - - // Appends to the result.reasoning_content field - void add_reasoning_content(const std::string & reasoning_content); - - // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything. - bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments); - - // Adds a tool call using the "name", "id" and "arguments" fields of the json object - bool add_tool_call(const nlohmann::ordered_json & tool_call); - - // Adds an array of tool calls using their "name", "id" and "arguments" fields. - bool add_tool_calls(const nlohmann::ordered_json & arr); - - // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } } - bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call); - - void finish(); - - bool consume_spaces(); - - void consume_literal(const std::string & literal); - - bool try_parse_reasoning(const std::string & start_think, const std::string & end_think); - - std::string consume_rest(); - - struct find_regex_result { - std::string prelude; - std::vector groups; - }; - - std::optional try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true); - - bool try_consume_literal(const std::string & literal); - - std::optional try_find_literal(const std::string & literal); - - find_regex_result consume_regex(const common_regex & regex); - - std::optional try_consume_regex(const common_regex & regex); - - std::optional try_consume_json(); - common_json consume_json(); - - struct consume_json_result { - nlohmann::ordered_json value; - bool is_partial; - }; - - /* - Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings. - - By default, object keys can't be truncated, nor can string values (their corresponding key is removed, - e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}` - - But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings - - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}` - - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}` - */ - consume_json_result consume_json_with_dumped_args( - const std::vector> & args_paths = {}, - const std::vector> & content_paths = {} - ); - std::optional try_consume_json_with_dumped_args( - const std::vector> & args_paths = {}, - const std::vector> & content_paths = {} - ); - - /** - * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched. - * form.scope_start, form.tool_sep and form.scope_end can be empty. - */ - bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form); - - // Parse content uses reasoning and XML-Style tool call - void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "", const std::string & end_think = ""); - - void clear_tools(); -}; diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index 1bcba9cd86..ba49ecf29b 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -1,13 +1,16 @@ #include "chat-peg-parser.h" +#include "chat-auto-parser.h" +#include "ggml.h" + #include -using json = nlohmann::json; +using json = nlohmann::ordered_json; static std::string_view trim_trailing_space(std::string_view sv, int max = -1) { int count = 0; while (!sv.empty() && std::isspace(static_cast(sv.back()))) { - if (max != -1 && count <= max) { + if (max != -1 && count >= max) { break; } sv.remove_suffix(1); @@ -16,109 +19,966 @@ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) { return sv; } +static std::string_view trim_leading_space(std::string_view sv, int max = -1) { + int count = 0; + while (!sv.empty() && std::isspace(static_cast(sv.front()))) { + if (max != -1 && count >= max) { + break; + } + sv.remove_prefix(1); + count++; + } + return sv; +} + +static std::string_view trim(std::string_view sv) { + return trim_trailing_space(trim_leading_space(sv, 1)); +} + +// Convert Python-style single-quoted strings to JSON double-quoted strings +// Only converts outer string delimiters, properly handling escape sequences: +// - {'key': 'value'} -> {"key": "value"} +// - {'code': 'print(\'hello\')'} -> {"code": "print('hello')"} +// - {'msg': 'He said "hi"'} -> {"msg": "He said \"hi\""} +static std::string normalize_quotes_to_json(const std::string & input) { + std::string result; + result.reserve(input.size() + 16); // May need extra space for escaping + + bool in_single_quoted = false; + bool in_double_quoted = false; + + for (size_t i = 0; i < input.size(); ++i) { + char c = input[i]; + + // Handle escape sequences + if (c == '\\' && i + 1 < input.size()) { + char next = input[i + 1]; + + if (in_single_quoted) { + // Inside a single-quoted string being converted to double quotes + if (next == '\'') { + // \' -> ' (escaped single quote becomes unescaped in double-quoted string) + result += '\''; + ++i; + continue; + } + if (next == '"') { + // \" stays as \" (already escaped, works in double-quoted string) + result += "\\\""; + ++i; + continue; + } + // Other escapes (\n, \\, etc.): pass through both characters + result += c; + result += next; + ++i; + continue; + } + + if (in_double_quoted) { + // Inside a double-quoted string - pass through escape sequences as-is + result += c; + result += next; + ++i; + continue; + } + + // Outside any string - just pass through the backslash + result += c; + continue; + } + + // Handle quote characters + if (c == '"') { + if (in_single_quoted) { + // Unescaped double quote inside single-quoted string -> must escape for JSON + result += "\\\""; + } else { + // Double quote as string delimiter or outside strings + in_double_quoted = !in_double_quoted; + result += c; + } + } else if (c == '\'') { + if (in_double_quoted) { + // Single quote inside double-quoted string -> pass through + result += c; + } else if (in_single_quoted) { + // Closing single quote -> convert to double quote + in_single_quoted = false; + result += '"'; + } else { + // Opening single quote -> convert to double quote + in_single_quoted = true; + result += '"'; + } + } else { + result += c; + } + } + + return result; +} + void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) { - arena.visit(result, [this](const common_peg_ast_node & node) { - map(node); - }); + arena.visit(result, [this](const common_peg_ast_node & node) { map(node); }); } void common_chat_peg_mapper::map(const common_peg_ast_node & node) { bool is_reasoning = node.tag == common_chat_peg_builder::REASONING; - bool is_content = node.tag == common_chat_peg_builder::CONTENT; + bool is_content = node.tag == common_chat_peg_builder::CONTENT; - if (is_reasoning) { - result.reasoning_content = std::string(trim_trailing_space(node.text)); + if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here + result.reasoning_content += std::string(trim_trailing_space(node.text)); } if (is_content) { - result.content = std::string(trim_trailing_space(node.text)); + // Concatenate content from multiple content nodes (e.g., when reasoning markers + // are preserved before content markers in reasoning_format=NONE mode) + result.content += std::string(trim_trailing_space(node.text)); } } -void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) { +common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string & tag_name, + const std::string & marker, + const common_peg_parser & p) { + if (marker.empty()) { + return zero_or_more(choice({ p, rule(tag_name, content(any())) })); + } + auto content_chunk = rule(tag_name, content(negate(literal(marker)) + any() + until(marker))); + return zero_or_more(choice({ p, content_chunk })); +} + +common_peg_parser common_chat_peg_unified_builder::build_reasoning_block(const content_structure & cs, + common_reasoning_format reasoning_format, + bool thinking_forced_open) { + // If reasoning is explicitly disabled, return empty + if (reasoning_format == COMMON_REASONING_FORMAT_NONE) { + return eps(); + } + + // Get reasoning markers - use from content_structure or fallback for DEEPSEEK format + std::string reason_start = cs.reasoning_start; + std::string reason_end = cs.reasoning_end; + + // If DEEPSEEK format is specified but markers weren't detected, use fallback markers + if ((reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK || + reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY) && + (reason_start.empty() || reason_end.empty())) { + // Try standard DeepSeek markers + if (reason_start.empty()) { + reason_start = ""; + } + if (reason_end.empty()) { + reason_end = ""; + } + } + + // If still no markers, return empty + // But allow empty start marker if thinking is forced open (implicit start) + if ((reason_start.empty() && !thinking_forced_open) || reason_end.empty()) { + return eps(); + } + + if (thinking_forced_open) { + // Mandatory reasoning: parse from current position to end marker + auto parser = reasoning(until(reason_end)) + literal(reason_end); + return rule("reasoning", reasoning_block(parser)); + } + // Optional reasoning: may or may not appear + // Also try <|START_THINKING|> style markers if standard markers don't match + auto standard_reasoning = + reasoning_block(literal(reason_start) + reasoning(until(reason_end)) + literal(reason_end)); + + // For templates that use <|START_THINKING|> style markers + if (reason_start == "" && reason_end == "") { + auto alt_reasoning = reasoning_block(literal("<|START_THINKING|>") + reasoning(until("<|END_THINKING|>")) + + literal("<|END_THINKING|>")); + return optional(rule("reasoning", choice({ standard_reasoning, alt_reasoning }))); + } + + return optional(rule("reasoning", standard_reasoning)); +} + +common_peg_parser common_chat_peg_unified_builder::build_content_block(const content_structure & cs, + common_reasoning_format reasoning_format, + const std::string & tool_section_start) { + GGML_UNUSED(tool_section_start); // leaving for now just in case + std::string content_start = cs.content_start; + std::string content_end = cs.content_end; + + // Add fallback content markers for DEEPSEEK format if not detected + // Some templates use tags for content when reasoning is enabled + if ((reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK || + reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY) && + (content_start.empty() || content_end.empty())) { + content_start = ""; + content_end = ""; + } + + // Handle content markers with both start and end + if (cs.content_mode != content_structure::CONTENT_PLAIN && !cs.content_start.empty() && !cs.content_end.empty()) { + // Content is wrapped in markers + if (reasoning_format == COMMON_REASONING_FORMAT_NONE) { + // When reasoning_format=NONE, preserve any content before the content start marker + // (this may include reasoning/thinking markers that the model generates). + // This applies even if reasoning markers weren't detected by the analyzer. + auto with_markers = content(until(cs.content_start)) + literal(cs.content_start) + + content(until(cs.content_end)) + literal(cs.content_end); + // Fallback: content wrapped in end marker only (start marker might be in prompt) + auto implicit_markers = content(until(cs.content_end)) + literal(cs.content_end); + auto without_markers = content(rest()); + return choice({ with_markers, implicit_markers, without_markers }); + } // When reasoning is parsed separately, content starts directly after reasoning block + auto with_markers = literal(cs.content_start) + content(until(cs.content_end)) + literal(cs.content_end); + auto implicit_markers = content(until(cs.content_end)) + literal(cs.content_end); + auto without_markers = content(rest()); + return choice({ with_markers, implicit_markers, without_markers }); + } + + // Handle content with only start marker (no end marker) + // This is for formats like recipient-based (Functionary v3.2) where content is prefixed with + // a marker but has no explicit closing marker - content ends at end of message or before tool calls + if (cs.content_mode != content_structure::CONTENT_PLAIN && !cs.content_start.empty() && cs.content_end.empty()) { + if (reasoning_format == COMMON_REASONING_FORMAT_NONE) { + // Preserve any content before the start marker, then consume the marker and capture rest + auto with_start_marker = content(until(cs.content_start)) + literal(cs.content_start) + content(rest()); + auto without_markers = content(rest()); + return choice({ with_start_marker, without_markers }); + } // Content starts directly after reasoning block + auto with_start_marker = literal(cs.content_start) + content(rest()); + auto without_markers = content(rest()); + return choice({ with_start_marker, without_markers }); + } + + // For DEEPSEEK format, try fallback content markers even if not detected + if (!content_start.empty() && !content_end.empty()) { + auto with_markers = literal(content_start) + content(until(content_end)) + literal(content_end); + auto without_markers = content(rest()); + return choice({ with_markers, without_markers }); + } + + // Plain content - capture rest + return content(rest()); +} + +common_peg_parser common_chat_peg_unified_builder::build_tool_section(const tool_call_structure & ts, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls) { + if (!ts.supports_tools || !tools.is_array() || tools.empty()) { + return eps(); + } + + // Build tool choices based on function format + auto tool_choices = choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + tool_choices |= rule("tool-" + name, build_function(ts, name, params)); + } + + // Build the section with or without markers + auto build_section = [&]() -> common_peg_parser { + // Markdown code block format (Cohere Command-R Plus): + // Action:\n```json\n[{...}]\n``` + if (ts.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK) { + // Build the opening: "Action:\n```json" + std::string code_fence_open = "```"; + if (!ts.code_block_language.empty()) { + code_fence_open += ts.code_block_language; + } + + auto opening = literal(ts.code_block_marker) + literal("\n") + literal(code_fence_open) + literal("\n"); + auto closing = literal("\n") + literal(ts.tool_section_end); // "\n```" + + // Build the JSON array of tool calls + // Don't use trigger_rule here since we're nested inside a sequence + auto tools_array = literal("[") + space(); + if (parallel_tool_calls) { + tools_array = tools_array + tool_choices; + tools_array = tools_array + zero_or_more(space() + literal(",") + space() + tool_choices); + } else { + tools_array = tools_array + optional(tool_choices); + } + tools_array = tools_array + space() + literal("]"); + + // Full section: Action:\n```json\n[{...}]\n``` + return trigger_rule("tool-call", opening + tools_array + closing); + } + + // Recipient-based format (Functionary v3.2): >>>function_name\n{arguments} + // Uses tool_section_start as delimiter, but no array wrapper or section markers + if (ts.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) { + auto tool_call = trigger_rule("tool-call", tool_choices); + if (parallel_tool_calls) { + // Multiple tool calls: each starts with >>> + return one_or_more(tool_call + space()); + } + return tool_call; + } + + if (!ts.tool_section_start.empty() && !ts.tool_section_end.empty()) { + // Check if this format has SEPARATE section markers and per-call markers. + // This happens when: + // - Section markers wrap the ENTIRE section (e.g., ...) + // - Function prefix contains its own per-call marker (e.g., ...) + // Example: DeepSeek R1 with section and call markers, Kimi-K2 with prefixed-indexed format + // We detect this by checking if function_prefix contains a per-call START marker + // (indicated by words like "call_begin", "call_start", or similar patterns) + bool has_separate_section_and_call_markers = false; + + // FUNC_PREFIXED_INDEXED and FUNC_BRACKET_TAG always have separate section and per-call markers + if (ts.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED || + ts.function_format == tool_call_structure::FUNC_BRACKET_TAG) { + has_separate_section_and_call_markers = true; + } else if (ts.function_format == tool_call_structure::FUNC_NAME_AS_KEY) { + // FUNC_NAME_AS_KEY uses comma-separated JSON objects in an array + // Format: [{"func1": args}, {"func2": args}] + // The brackets are included in section markers + auto tool_call = trigger_rule("tool-call", tool_choices); + auto tool_calls = tool_call; + if (parallel_tool_calls) { + tool_calls = tool_call + zero_or_more(space() + literal(",") + space() + tool_call); + } + return literal(ts.tool_section_start) + space() + tool_calls + space() + literal(ts.tool_section_end); + } else if (ts.function_format == tool_call_structure::FUNC_TAG_WITH_NAME && !ts.function_prefix.empty()) { + // Check if function_prefix contains a per-call marker like "" + // This differentiates DeepSeek R1 (where function_prefix has its own call marker) + // from Nemotron (where function_prefix is just " ... + auto tool_call = trigger_rule("tool-call", tool_choices); + auto tool_calls = parallel_tool_calls ? one_or_more(tool_call + space()) : tool_call; + return literal(ts.tool_section_start) + space() + tool_calls + space() + literal(ts.tool_section_end); + } // Each tool call has its own wrapper: tool + auto single_tool_section = + trigger_rule("tool-call", literal(ts.tool_section_start) + space() + tool_choices + space() + + literal(ts.tool_section_end)); + if (parallel_tool_calls) { + // Multiple wrapped tool calls + return one_or_more(single_tool_section + space()); + } + return single_tool_section; + } + if (!ts.tool_section_start.empty()) { + // Start marker only (no end marker) - e.g., <|tool_call|>[...] + // Wrap all tool calls in an array after the start marker + auto tools_array = literal("[") + space(); + if (parallel_tool_calls) { + tools_array = tools_array + tool_choices; + tools_array = tools_array + zero_or_more(space() + literal(",") + space() + tool_choices); + } else { + tools_array = tools_array + optional(tool_choices); + } + tools_array = tools_array + space() + literal("]"); + + return trigger_rule("tool-call", literal(ts.tool_section_start) + tools_array); + } // No section markers (raw JSON format, e.g., Llama 3.1) + // Use trigger rule since tool calls are identified by regex trigger on the grammar + if (parallel_tool_calls) { + return trigger_rule("tool-call", one_or_more(tool_choices + space())); + } + return trigger_rule("tool-call", tool_choices); + }; + + auto section = build_section(); + if (!force_tool_calls) { + section = optional(section); + } + + return section; +} + +common_peg_parser common_chat_peg_unified_builder::build_function(const tool_call_structure & ts, + const std::string & name, + const nlohmann::json & schema) { + auto args = build_arguments(ts, schema); + + switch (ts.function_format) { + case tool_call_structure::FUNC_JSON_OBJECT: + { + // Build JSON object parser that accepts id field in either position: + // - Before name: {"id": "...", "name": "X", "arguments": {...}} (R7B style) + // - After args: {"name": "X", "arguments": {...}, "id": "..."} (Mistral style) + auto tool_name_ = json_member(ts.name_field, "\"" + tool_name(literal(name)) + "\""); + auto tool_args_ = json_member(ts.args_field, tool_args(args)); + + // id can appear before name or after args + auto id_member = json_member(ts.id_field, tool_id(json_string())); + auto id_before = ts.id_field.empty() ? eps() : optional(id_member << space() << "," << space()); + auto id_after = ts.id_field.empty() ? eps() : optional(space() << "," << space() << id_member); + + return tool(tool_open(literal("{")) << space() << id_before // optional id before name (R7B style) + << tool_name_ << space() << "," << space() << tool_args_ + << id_after // optional id after args (Mistral style) + << zero_or_more(space() << "," << space() << json_string() + << space() << ":" << space() << json()) + << space() << "}"); + } + + case tool_call_structure::FUNC_TAG_WITH_NAME: + { + // Build tag parser: {...} + // Combine prefix + name + suffix into tool_open to ensure the tool is only created + // when the FULL opening tag is confirmed. This prevents partial name matches during + // incremental parsing (e.g., matching "special_function" when input is "special_function_") + auto opening = literal(ts.function_prefix) + tool_name(literal(name)) + literal(ts.function_suffix); + // Note: No space() before tool_close because function_close may start with newline + // (e.g., "\n```") and space() would consume it, preventing the literal match + return tool(tool_open(opening) + space() + tool_args(args) + tool_close(literal(ts.function_close))); + } + + case tool_call_structure::FUNC_TAG_NAME_ONLY: + { + // Build tag parser: ... + // Combine < + name + > into tool_open to prevent partial matches + auto opening = literal("<") + tool_name(literal(name)) + literal(">"); + return tool(tool_open(opening) + space() + tool_args(args) + space() + + tool_close(literal(""))); + } + + case tool_call_structure::FUNC_PREFIXED_INDEXED: + { + // Build prefixed-indexed parser (e.g., Kimi-K2): + // <|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{...}<|tool_call_end|> + // The index number after : is ignored (we use zero_or_more(digit) to skip it) + auto opening = literal(ts.per_call_start) + literal(ts.function_namespace) + tool_name(literal(name)) + + literal(":") + zero_or_more(chars("0-9", 1, 1)) + // Skip the index + literal(ts.args_marker); + return tool(tool_open(opening) + space() + tool_args(args) + space() + + tool_close(literal(ts.per_call_end))); + } + + case tool_call_structure::FUNC_NAME_AS_KEY: + { + // Build name-as-key parser (e.g., Apertus): + // {"function_name": {...arguments...}} + // The function name IS the JSON key, and arguments are the value directly + auto opening = literal("{\"") + tool_name(literal(name)) + literal("\":"); + return tool(tool_open(opening) + space() + tool_args(args) + space() + literal("}")); + } + + case tool_call_structure::FUNC_BRACKET_TAG: + { + // Build bracket-tag parser (e.g., Mistral Small 3.2): + // [TOOL_CALLS]function_name[CALL_ID]call_id[ARGS]{...} + // per_call_start = "[TOOL_CALLS]" + // id_marker = "[CALL_ID]" + // args_marker = "[ARGS]" + auto opening = literal(ts.per_call_start) + tool_name(literal(name)); + if (!ts.id_marker.empty()) { + // Add id_marker + id value (captured as tool_id) + opening = opening + literal(ts.id_marker) + tool_id(until(ts.args_marker)); + } + if (!ts.args_marker.empty()) { + opening = opening + literal(ts.args_marker); + } + // No explicit closer for this format (EOS terminates) + return tool(tool_open(opening) + space() + tool_args(args)); + } + + case tool_call_structure::FUNC_RECIPIENT_BASED: + { + // Build recipient-based parser (e.g., Functionary v3.2): + // >>>function_name + // {'param1': 'value1', 'param2': 'value2'} + // tool_section_start = ">>>" + // Function name directly follows ">>>" with newline, arguments are Python dict (parse as JSON) + auto opening = literal(ts.tool_section_start) + tool_name(literal(name)); + // No explicit closer (newline + arguments, then EOS or next >>>) + return tool(tool_open(opening) + space() + tool_args(args)); + } + + case tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK: + { + // Build markdown code block parser (e.g., Cohere Command-R Plus): + // Action: + // ```json + // [ + // { + // "tool_name": "function_name", + // "parameters": {...} + // } + // ] + // ``` + // The individual function is a JSON object within the array + auto tool_name_ = json_member(ts.name_field, "\"" + tool_name(literal(name)) + "\""); + auto tool_args_ = json_member(ts.args_field, tool_args(args)); + + // Build the JSON object: {"tool_name": "...", "parameters": {...}} + // Use same pattern as FUNC_JSON_OBJECT: tool_open with atomic wrapper + return tool(tool_open(literal("{")) << space() << tool_name_ << space() << "," << space() << tool_args_ + << zero_or_more(space() << "," << space() << json_string() + << space() << ":" << space() << json()) + << space() << "}"); + } + } + + return eps(); +} + +common_peg_parser common_chat_peg_unified_builder::build_arguments(const tool_call_structure & ts, + const nlohmann::json & params) { + switch (ts.argument_format) { + case tool_call_structure::ARGS_JSON: + { + // Standard JSON object arguments + if (params.is_object()) { + return schema(json(), "args", params); + } + return json(); + } + + case tool_call_structure::ARGS_TAGGED: + { + // Tagged arguments: value + if (!params.contains("properties") || params.at("properties").empty()) { + return eps(); + } + + auto arg_choice = choice(); + for (const auto & el : params.at("properties").items()) { + const std::string & prop_name = el.key(); + const auto & prop_schema = el.value(); + + // Check if the schema declares this as a string type + bool is_string_type = prop_schema.contains("type") && prop_schema.at("type") == "string"; + + auto arg_name_parser = choice( + { literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") }); + + // Use tool_arg_string_value for string types to prevent treating "[..." as JSON array + auto value_parser = is_string_type ? tool_arg_string_value(until(ts.arg_close)) + : tool_arg_value(until(ts.arg_close)); + + auto arg_rule = tool_arg(tool_arg_open(literal(ts.arg_prefix)) + tool_arg_name(arg_name_parser) + + literal(ts.arg_suffix) + value_parser + + tool_arg_close(literal(ts.arg_close)) + + (ts.arg_separator.empty() ? eps() : optional(literal(ts.arg_separator)))); + arg_choice |= arg_rule; + } + return zero_or_more(arg_choice + space()); + } + + case tool_call_structure::ARGS_KEY_VALUE_TAGS: + { + // Key-value tag arguments (GLM-4.6 style): + // key + // value + if (!params.contains("properties") || params.at("properties").empty()) { + return eps(); + } + + auto arg_choice = choice(); + for (const auto & el : params.at("properties").items()) { + const std::string & prop_name = el.key(); + const auto & prop_schema = el.value(); + + // Check if the schema declares this as a string type + bool is_string_type = prop_schema.contains("type") && prop_schema.at("type") == "string"; + + // Parse: key\nvalue + // ts.arg_prefix = "", ts.arg_suffix = "", ts.arg_close = "" + // Use tool_arg_string_value for string types to prevent treating "[..." as JSON array + auto value_parser = is_string_type ? tool_arg_string_value(until(ts.arg_close)) + : tool_arg_value(until(ts.arg_close)); + + auto arg_rule = tool_arg(tool_arg_open(literal(ts.arg_prefix)) + tool_arg_name(literal(prop_name)) + + literal(ts.arg_suffix) + // + space() + literal("") + value_parser + + tool_arg_close(literal(ts.arg_close))); + arg_choice |= arg_rule; + } + return zero_or_more(arg_choice + space()); + } + } + + return eps(); +} + +common_peg_parser common_chat_peg_unified_builder::standard_json_tools(const std::string & section_start, + const std::string & section_end, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls) { + if (!tools.is_array() || tools.empty()) { + return eps(); + } + + // Build tool choices for JSON format + auto tool_choices = choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build JSON object parser: {"name": "X", "arguments": {...}} + auto tool_name_ = json_member("name", "\"" + tool_name(literal(name)) + "\""); + auto tool_args_ = json_member("arguments", tool_args(schema(json(), "tool-" + name + "-schema", params))); + + auto tool_parser = + tool(tool_open(literal("{")) << space() << tool_name_ << space() << "," << space() << tool_args_ + << zero_or_more(space() << "," << space() << json_string() << space() << ":" + << space() << json()) + << space() << "}"); + + tool_choices |= rule("tool-" + name, tool_parser); + } + + // Build the section with markers + auto tool_calls = tool_choices; + if (parallel_tool_calls) { + tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices); + } + + auto section = + trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end)); + + return force_tool_calls ? section : optional(section); +} + +common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools( + const std::map & markers, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls) { + if (!tools.is_array() || tools.empty()) { + return eps(); + } + + // Extract markers with defaults + auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string { + auto it = markers.find(key); + return it != markers.end() ? it->second : default_val; + }; + + std::string section_start = get_marker("tool_call_start_marker", ""); + std::string section_end = get_marker("tool_call_end_marker", ""); + std::string func_opener = get_marker("function_opener", ""); + std::string func_closer = get_marker("function_closer", ""); + std::string param_key_prefix = get_marker("parameter_key_prefix", ""); + std::string param_closer = get_marker("parameter_closer", ""); + + // Build tool choices for tagged format + auto tool_choices = choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build argument parsers + auto args = eps(); + if (params.contains("properties") && !params["properties"].empty()) { + auto arg_choice = choice(); + for (const auto & el : params["properties"].items()) { + const std::string & prop_name = el.key(); + + auto arg_name_parser = + choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") }); + + auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) + + literal(param_key_suffix) + tool_arg_value(until(param_closer)) + + tool_arg_close(literal(param_closer))); + arg_choice |= arg_rule; + } + args = zero_or_more(arg_choice + space()); + } + + // Build function parser: args + auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) + + space() + tool_args(args) + space() + tool_close(literal(func_closer))); + + tool_choices |= rule("tool-" + name, tool_parser); + } + + // Build the section with markers + auto section = + parallel_tool_calls ? + trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) + + literal(section_end)) : + trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end)); + + return force_tool_calls ? section : optional(section); +} + +void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena, + const common_peg_parse_result & parse_result_arg) { + // Call base class to visit all nodes + common_chat_peg_mapper::from_ast(arena, parse_result_arg); + + // Flush any pending tool call that was started but never got a name + // This happens during partial parsing when the tool call is incomplete + if (pending_tool_call.has_value()) { + // Transfer any buffered arguments + if (!args_buffer.empty()) { + pending_tool_call->arguments = args_buffer; + } + // Close any open quotes in buffered args + if (buffer_needs_closing_quote && !pending_tool_call->arguments.empty()) { + pending_tool_call->arguments += "\""; + } + // Add the incomplete tool call to results + result.tool_calls.push_back(pending_tool_call.value()); + pending_tool_call.reset(); + } +} + +void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { + // First call base class for reasoning/content handling common_chat_peg_mapper::map(node); - bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN; - bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME; - bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID; - bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS; + // Handle tool-related tags (unified version supporting both JSON and tagged formats) + bool is_tool_open = node.tag == common_chat_peg_unified_builder::TOOL_OPEN; + bool is_tool_close = node.tag == common_chat_peg_unified_builder::TOOL_CLOSE; + bool is_tool_name = node.tag == common_chat_peg_unified_builder::TOOL_NAME; + bool is_tool_id = node.tag == common_chat_peg_unified_builder::TOOL_ID; + bool is_tool_args = node.tag == common_chat_peg_unified_builder::TOOL_ARGS; + bool is_arg_open = node.tag == common_chat_peg_unified_builder::TOOL_ARG_OPEN; + bool is_arg_close = node.tag == common_chat_peg_unified_builder::TOOL_ARG_CLOSE; + bool is_arg_name = node.tag == common_chat_peg_unified_builder::TOOL_ARG_NAME; + bool is_arg_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_VALUE; + bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE; if (is_tool_open) { - result.tool_calls.emplace_back(); - current_tool = &result.tool_calls.back(); + // Don't create tool call yet - wait for name to be known + // This prevents sending incomplete tool calls in streaming mode + pending_tool_call = common_chat_tool_call(); + current_tool = &pending_tool_call.value(); + arg_count = 0; + // Clear the arguments buffer for the new tool + args_buffer.clear(); + needs_closing_quote = false; + buffer_needs_closing_quote = false; } if (is_tool_id && current_tool) { - current_tool->id = std::string(trim_trailing_space(node.text)); + auto text = trim_trailing_space(node.text); + if (text.size() >= 2 && text.front() == '"' && text.back() == '"') { + text = text.substr(1, text.size() - 2); + } + current_tool->id = std::string(text); } if (is_tool_name && current_tool) { current_tool->name = std::string(trim_trailing_space(node.text)); + // Now that we have the name, we can populate the arguments from the buffer + if (!args_buffer.empty()) { + current_tool->arguments = args_buffer; + args_buffer.clear(); + } else if (current_tool->arguments.empty()) { + // Initialize arguments if we're using tagged format and no buffered args + current_tool->arguments = "{"; + } + // Now that we have the name, add the tool call to the result + if (pending_tool_call.has_value()) { + result.tool_calls.push_back(pending_tool_call.value()); + pending_tool_call.reset(); + current_tool = &result.tool_calls.back(); + } } if (is_tool_args && current_tool) { - current_tool->arguments = std::string(trim_trailing_space(node.text)); - } -} - -void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) { - common_chat_peg_mapper::map(node); - - bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN; - bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME; - bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE; - bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN; - bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE; - bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME; - bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE; - bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE; - - if (is_tool_open) { - result.tool_calls.emplace_back(); - current_tool = &result.tool_calls.back(); - arg_count = 0; - } - - if (is_tool_name) { - current_tool->name = std::string(node.text); - current_tool->arguments = "{"; + // For JSON format, the arguments come as a complete JSON object + // For tagged format, we build up arguments from individual arg_name/arg_value nodes + // Check if this looks like JSON (starts with {) vs tagged format (starts with <) + auto text = trim_trailing_space(node.text); + if (!text.empty() && text.front() == '{') { + // If we have the tool name, populate directly; otherwise buffer + if (!current_tool->name.empty()) { + current_tool->arguments = std::string(text); + } else { + args_buffer = std::string(text); + } + } + // If it's tagged format, we ignore this and let arg_name/arg_value build up the JSON } if (is_arg_open) { - needs_closing_quote = false; + // Reset for new argument + if (!current_tool->name.empty()) { + needs_closing_quote = false; + } else { + buffer_needs_closing_quote = false; + } } if (is_arg_name && current_tool) { + std::string arg_entry; if (arg_count > 0) { - current_tool->arguments += ","; + arg_entry = ","; } - current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":"; + arg_entry += json(trim(node.text)).dump() + ":"; ++arg_count; + + // If we have the tool name, add directly; otherwise buffer + if (!current_tool->name.empty()) { + current_tool->arguments += arg_entry; + } else { + if (args_buffer.empty()) { + args_buffer = "{"; + } + args_buffer += arg_entry; + } } - if (is_arg_string && current_tool) { - // Serialize to JSON, but exclude the end quote - std::string dumped = json(trim_trailing_space(node.text)).dump(); - current_tool->arguments += dumped.substr(0, dumped.size() - 1); - needs_closing_quote = true; + if ((is_arg_value || is_arg_string_value) && current_tool) { + std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1)); + + std::string value_to_add; + if (!value_content.empty()) { + // For potential containers, normalize Python-style single quotes to JSON double quotes first + // This ensures consistent output during both partial and final parsing + // Note: is_arg_string_value means the schema explicitly declares this as a string type, + // so we should NOT treat it as a potential container even if it starts with [ or { + bool is_potential_container = !is_arg_string_value && + (value_content[0] == '[' || value_content[0] == '{'); + if (is_potential_container) { + value_content = normalize_quotes_to_json(value_content); + } + + // Try to parse as JSON value (number, bool, null, object, array) + // For strings, we need special handling to support incremental parsing + try { + json parsed = json::parse(value_content); + if (parsed.is_string()) { + // For string values, don't add closing quote yet (added by arg_close) + // This ensures incremental parsing produces monotonic arguments + std::string escaped = parsed.dump(); + // Remove the trailing quote + if (!escaped.empty() && escaped.back() == '"') { + escaped.pop_back(); + } + value_to_add = escaped; + if (!current_tool->name.empty()) { + needs_closing_quote = true; + } else { + buffer_needs_closing_quote = true; + } + } else { + // For non-string values (number, bool, null, object, array), add raw value content + // Using raw content instead of dump() ensures monotonicity for streaming + // (prevents issues with spaces being removed by dump()) + value_to_add = value_content; + } + } catch (...) { + // JSON parsing failed - content is either incomplete (partial) or not valid JSON + // Note: potential containers were already normalized above, so value_content + // already has double quotes if it started with [ or { + + if (node.is_partial && is_potential_container) { + // During incremental parsing, if it looks like a JSON container, don't wrap in quotes yet + // and don't escape. Just pass through the (already normalized) content. + value_to_add = value_content; + } else { + // Not valid JSON and NOT a potential partial container - treat as string value + // Add opening quote if not already in a string + if (!current_tool->name.empty()) { + if (!needs_closing_quote) { + value_to_add = "\""; + needs_closing_quote = true; + } + } else { + if (!buffer_needs_closing_quote) { + value_to_add = "\""; + buffer_needs_closing_quote = true; + } + } + // Escape special characters in the string content + std::string escaped = json(value_content).dump(); + // Remove the surrounding quotes from the escaped string + if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { + escaped = escaped.substr(1, escaped.size() - 2); + } + value_to_add += escaped; + } + } + } + + // If we have the tool name, add directly; otherwise buffer + if (!current_tool->name.empty()) { + current_tool->arguments += value_to_add; + } else { + if (args_buffer.empty()) { + args_buffer = "{"; + } + args_buffer += value_to_add; + } } if (is_arg_close && current_tool) { - if (needs_closing_quote) { - current_tool->arguments += "\""; - needs_closing_quote = false; + if (!current_tool->name.empty()) { + if (needs_closing_quote) { + current_tool->arguments += "\""; + needs_closing_quote = false; + } + } else { + if (buffer_needs_closing_quote) { + if (args_buffer.empty()) { + args_buffer = "{"; + } + args_buffer += "\""; + buffer_needs_closing_quote = false; + } } } - if (is_arg_json && current_tool) { - current_tool->arguments += std::string(trim_trailing_space(node.text)); - } - if (is_tool_close && current_tool) { - if (needs_closing_quote) { - current_tool->arguments += "\""; - needs_closing_quote = false; + if (!current_tool->name.empty()) { + if (needs_closing_quote) { + current_tool->arguments += "\""; + needs_closing_quote = false; + } + // Close the arguments object if using tagged format + if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') { + current_tool->arguments += "}"; + } + // If we have a pending tool call that wasn't added yet, add it now + if (pending_tool_call.has_value()) { + result.tool_calls.push_back(pending_tool_call.value()); + pending_tool_call.reset(); + } + } else { + // We're closing a tool without a name - flush the buffer + if (!args_buffer.empty()) { + current_tool->arguments = args_buffer; + args_buffer.clear(); + } + if (buffer_needs_closing_quote) { + current_tool->arguments += "\""; + buffer_needs_closing_quote = false; + } + // Close the arguments object if using tagged format + if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') { + current_tool->arguments += "}"; + } + // Don't add to result if no name - this prevents incomplete tool calls + pending_tool_call.reset(); } - current_tool->arguments += "}"; } } diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h index b84cbed206..920d5cffd4 100644 --- a/common/chat-peg-parser.h +++ b/common/chat-peg-parser.h @@ -3,18 +3,28 @@ #include "chat.h" #include "peg-parser.h" +#include +#include + class common_chat_peg_builder : public common_peg_parser_builder { public: static constexpr const char * REASONING_BLOCK = "reasoning-block"; - static constexpr const char * REASONING = "reasoning"; - static constexpr const char * CONTENT = "content"; + static constexpr const char * REASONING = "reasoning"; + static constexpr const char * CONTENT = "content"; common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); } + common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); } + common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); } + + common_peg_parser tag_with_safe_content(const std::string & tag_name, + const std::string & marker, + const common_peg_parser & p); }; -inline common_peg_arena build_chat_peg_parser(const std::function & fn) { +inline common_peg_arena build_chat_peg_parser( + const std::function & fn) { common_chat_peg_builder builder; builder.set_root(fn(builder)); return builder.build(); @@ -26,80 +36,119 @@ class common_chat_peg_mapper { common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {} + virtual ~common_chat_peg_mapper() = default; + virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result); virtual void map(const common_peg_ast_node & node); }; -class common_chat_peg_native_builder : public common_chat_peg_builder { +struct content_structure; +struct tool_call_structure; + +class common_chat_peg_unified_builder : public common_chat_peg_builder { public: - static constexpr const char * TOOL = "tool"; - static constexpr const char * TOOL_OPEN = "tool-open"; - static constexpr const char * TOOL_CLOSE = "tool-close"; - static constexpr const char * TOOL_ID = "tool-id"; - static constexpr const char * TOOL_NAME = "tool-name"; - static constexpr const char * TOOL_ARGS = "tool-args"; - - common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); } - common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); } - common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); } - common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); } - common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); } - common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); } -}; - -class common_chat_peg_native_mapper : public common_chat_peg_mapper { - common_chat_tool_call * current_tool; - - public: - common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {} - - void map(const common_peg_ast_node & node) override; -}; - -inline common_peg_arena build_chat_peg_native_parser(const std::function & fn) { - common_chat_peg_native_builder builder; - builder.set_root(fn(builder)); - return builder.build(); -} - -class common_chat_peg_constructed_builder : public common_chat_peg_builder { - public: - static constexpr const char * TOOL = "tool"; - static constexpr const char * TOOL_OPEN = "tool-open"; - static constexpr const char * TOOL_CLOSE = "tool-close"; - static constexpr const char * TOOL_NAME = "tool-name"; - static constexpr const char * TOOL_ARG = "tool-arg"; - static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open"; + // Tag constants + static constexpr const char * TOOL = "tool"; + static constexpr const char * TOOL_OPEN = "tool-open"; + static constexpr const char * TOOL_CLOSE = "tool-close"; + static constexpr const char * TOOL_ID = "tool-id"; + static constexpr const char * TOOL_NAME = "tool-name"; + static constexpr const char * TOOL_ARGS = "tool-args"; + static constexpr const char * TOOL_ARG = "tool-arg"; + static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open"; static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close"; - static constexpr const char * TOOL_ARG_NAME = "tool-arg-name"; - static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value"; - static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value"; + static constexpr const char * TOOL_ARG_NAME = "tool-arg-name"; + static constexpr const char * TOOL_ARG_VALUE = "tool-arg-value"; + static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value"; // For schema-declared string types + // Low-level tag methods common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); } + common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); } + common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); } + + common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); } + common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); } + + common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); } + common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); } + common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); } + common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); } + common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); } + + common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); } + + // Use for schema-declared string types - won't be treated as potential JSON container common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); } - common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); } + + common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); } + + // High-level building methods + + // Build reasoning block based on ContentStructure + common_peg_parser build_reasoning_block(const content_structure & cs, + common_reasoning_format reasoning_format, + bool thinking_forced_open); + + // Build content block based on ContentStructure + common_peg_parser build_content_block(const content_structure & cs, + common_reasoning_format reasoning_format, + const std::string & tool_section_start = ""); + + // Build complete tool section based on ToolCallStructure + common_peg_parser build_tool_section(const tool_call_structure & ts, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls); + + // Build single function parser based on ToolCallStructure + common_peg_parser build_function(const tool_call_structure & ts, + const std::string & name, + const nlohmann::json & schema); + + // Build arguments parser based on ToolCallStructure + common_peg_parser build_arguments(const tool_call_structure & ts, const nlohmann::json & params); + + // Legacy-compatible helper for building standard JSON tool calls + // Used by tests and manual parsers + common_peg_parser standard_json_tools(const std::string & section_start, + const std::string & section_end, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls); + + // Legacy-compatible helper for building XML/tagged style tool calls + // Used by tests and manual parsers + common_peg_parser standard_constructed_tools(const std::map & markers, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls); }; -class common_chat_peg_constructed_mapper : public common_chat_peg_mapper { - common_chat_tool_call * current_tool; - int arg_count = 0; - bool needs_closing_quote = false; - - public: - common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {} - - void map(const common_peg_ast_node & node) override; -}; - -inline common_peg_arena build_chat_peg_constructed_parser(const std::function & fn) { - common_chat_peg_constructed_builder builder; +inline common_peg_arena build_chat_peg_unified_parser( + const std::function & fn) { + common_chat_peg_unified_builder builder; builder.set_root(fn(builder)); return builder.build(); } + +class common_chat_peg_unified_mapper : public common_chat_peg_mapper { + std::optional pending_tool_call; // Tool call waiting for name + common_chat_tool_call * current_tool = nullptr; + int arg_count = 0; + bool needs_closing_quote = false; + std::string args_buffer; // Buffer to delay arguments until tool name is known + bool buffer_needs_closing_quote = false; // Track quote state for buffered args + + public: + common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {} + + void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & parse_result_arg) override; + void map(const common_peg_ast_node & node) override; +}; diff --git a/common/chat.cpp b/common/chat.cpp index 07114acf33..0662e61732 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1,8 +1,10 @@ #include "chat.h" -#include "chat-parser.h" + +#include "chat-auto-parser-helpers.h" +#include "chat-auto-parser.h" #include "chat-peg-parser.h" #include "common.h" -#include "json-partial.h" +#include "ggml.h" #include "json-schema-to-grammar.h" #include "log.h" #include "regex-partial.h" @@ -12,13 +14,13 @@ #include "jinja/runtime.h" #include "jinja/caps.h" -#include #include -#include +#include #include #include -#include + #include +#include #include #include #include @@ -26,14 +28,26 @@ using json = nlohmann::ordered_json; static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) { - auto time = std::chrono::system_clock::to_time_t(now); - auto local_time = *std::localtime(&time); + auto time = std::chrono::system_clock::to_time_t(now); + auto local_time = *std::localtime(&time); std::ostringstream ss; ss << std::put_time(&local_time, format.c_str()); auto res = ss.str(); return res; } +static json safe_args_parse(const std::string & to_parse) { + std::string stripped = to_parse; + if (to_parse.at(0) == '"' && to_parse.at(to_parse.length() - 1) == '"') { + stripped = to_parse.substr(1, to_parse.length() - 1); + } + try { + return json::parse(stripped); + } catch (json::exception & e) { + return stripped; + } +} + static std::string string_diff(const std::string & last, const std::string & current) { if (last.empty()) { return current; @@ -122,7 +136,8 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const { return jmsg; } -std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) { +std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, + const common_chat_msg & msg_new) { std::vector diffs; if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) { diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3); @@ -132,38 +147,56 @@ std::vector common_chat_msg_diff::compute_diffs(const comm // TODO: these can become expensive for long messages - how to optimize? if (msg_prv.reasoning_content != msg_new.reasoning_content) { - auto & diff = diffs.emplace_back(); + auto & diff = diffs.emplace_back(); diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content); } if (msg_prv.content != msg_new.content) { - auto & diff = diffs.emplace_back(); + auto & diff = diffs.emplace_back(); diff.content_delta = string_diff(msg_prv.content, msg_new.content); } if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) { - throw std::runtime_error("Invalid diff: now finding less tool calls!"); + std::string err = "Invalid diff: now finding less tool calls!\n"; + err += " Previous (" + std::to_string(msg_prv.tool_calls.size()) + "):\n"; + for (const auto & tc : msg_prv.tool_calls) { + err += " - name: '" + tc.name + "', args: '" + tc.arguments + "'\n"; + } + err += " Current (" + std::to_string(msg_new.tool_calls.size()) + "):\n"; + for (const auto & tc : msg_new.tool_calls) { + err += " - name: '" + tc.name + "', args: '" + tc.arguments + "'\n"; + } + err += " Current msg text content:\n" + msg_new.content + "\n"; + throw std::runtime_error(err); } if (!msg_prv.tool_calls.empty()) { - const auto idx = msg_prv.tool_calls.size() - 1; + const auto idx = msg_prv.tool_calls.size() - 1; const auto & pref = msg_prv.tool_calls[idx]; const auto & newf = msg_new.tool_calls[idx]; - if (pref.name != newf.name) { - throw std::runtime_error("Invalid diff: tool call mismatch!"); + // Allow tool name to change during incremental parsing: + // - empty -> non-empty (initial discovery) + // - prefix -> longer string (name grows as more input is parsed) + if (pref.name != newf.name && !pref.name.empty() && !newf.name.empty()) { + // Check if one is a prefix of the other (for incremental parsing where names grow or shrink) + bool is_prefix = (newf.name.rfind(pref.name, 0) == 0); + if (!is_prefix) { + LOG_ERR("Tool call mismatch: prev='%s' new='%s'\n", pref.name.c_str(), newf.name.c_str()); + throw std::runtime_error("Invalid diff: tool call mismatch!"); + } } const auto args_diff = string_diff(pref.arguments, newf.arguments); - if (!args_diff.empty() || pref.id != newf.id) { - auto & diff = diffs.emplace_back(); + if (!args_diff.empty() || pref.id != newf.id || pref.name != newf.name) { + auto & diff = diffs.emplace_back(); diff.tool_call_index = idx; - if (pref.id != newf.id) { - diff.tool_call_delta.id = newf.id; + if (pref.id != newf.id || pref.name != newf.name) { + diff.tool_call_delta.id = newf.id; diff.tool_call_delta.name = newf.name; } diff.tool_call_delta.arguments = args_diff; } } for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) { - auto & diff = diffs.emplace_back(); + auto & diff = diffs.emplace_back(); diff.tool_call_index = idx; diff.tool_call_delta = msg_new.tool_calls[idx]; } @@ -173,94 +206,14 @@ std::vector common_chat_msg_diff::compute_diffs(const comm using chat_template_caps = jinja::caps; -struct common_chat_template { - jinja::program prog; - std::string bos_tok; - std::string eos_tok; - std::string src; - chat_template_caps caps; - - common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) { - jinja::lexer lexer; - auto lexer_res = lexer.tokenize(src); - this->prog = jinja::parse_from_tokens(lexer_res); - - this->src = lexer_res.source; - this->bos_tok = bos_token; - this->eos_tok = eos_token; - - this->caps = jinja::caps_get(prog); - // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str()); - } - - const std::string & source() const { return src; } - const std::string & bos_token() const { return bos_tok; } - const std::string & eos_token() const { return eos_tok; } - - // TODO: this is ugly, refactor it somehow - json add_system(const json & messages, const std::string & system_prompt) const { - GGML_ASSERT(messages.is_array()); - auto msgs_copy = messages; - if (!caps.supports_system_role) { - if (msgs_copy.empty()) { - msgs_copy.insert(msgs_copy.begin(), json{ - {"role", "user"}, - {"content", system_prompt} - }); - } else { - auto & first_msg = msgs_copy[0]; - if (!first_msg.contains("content")) { - first_msg["content"] = ""; - } - first_msg["content"] = system_prompt + "\n\n" - + first_msg["content"].get(); - } - } else { - if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") { - msgs_copy.insert(msgs_copy.begin(), json{ - {"role", "system"}, - {"content", system_prompt} - }); - } else if (msgs_copy[0].at("role") == "system") { - msgs_copy[0]["content"] = system_prompt; - } - } - return msgs_copy; - } - - chat_template_caps original_caps() const { - return caps; - } - -}; - struct common_chat_templates { bool add_bos; bool add_eos; - bool has_explicit_template; // Model had builtin template or template overridde was specified. - std::unique_ptr template_default; // always set (defaults to chatml) + bool has_explicit_template; // Model had builtin template or template overridde was specified. + std::unique_ptr template_default; // always set (defaults to chatml) std::unique_ptr template_tool_use; }; -struct templates_params { - json messages; - json tools; - common_chat_tool_choice tool_choice; - json json_schema; - bool parallel_tool_calls; - common_reasoning_format reasoning_format; - bool stream; - std::string grammar; - bool add_generation_prompt = true; - bool enable_thinking = true; - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - json extra_context; - bool add_bos; - bool add_eos; - bool is_inference = true; - bool mark_input = true; // whether to mark input strings in the jinja context -}; - common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) { if (tool_choice == "auto") { return COMMON_CHAT_TOOL_CHOICE_AUTO; @@ -276,13 +229,13 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) { common_chat_templates_inputs dummy_inputs; - common_chat_msg msg; - msg.role = "user"; - msg.content = "test"; - dummy_inputs.messages = {msg}; - dummy_inputs.enable_thinking = false; - const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); - dummy_inputs.enable_thinking = true; + common_chat_msg msg; + msg.role = "user"; + msg.content = "test"; + dummy_inputs.messages = { msg }; + dummy_inputs.enable_thinking = false; + const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); + dummy_inputs.enable_thinking = true; const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); return rendered_no_thinking.prompt != rendered_with_thinking.prompt; } @@ -291,7 +244,6 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa std::vector msgs; try { - if (!messages.is_array()) { throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump()); } @@ -307,7 +259,7 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa } msg.role = message.at("role"); - auto has_content = message.contains("content"); + auto has_content = message.contains("content"); auto has_tool_calls = message.contains("tool_calls"); if (has_content) { const auto & content = message.at("content"); @@ -328,7 +280,9 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa msg.content_parts.push_back(msg_part); } } else if (!content.is_null()) { - throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); + throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + + content.dump() + + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)"); } } if (has_tool_calls) { @@ -348,8 +302,13 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa if (!fc.contains("name")) { throw std::invalid_argument("Missing tool call name: " + tool_call.dump()); } - tc.name = fc.at("name"); - tc.arguments = fc.at("arguments"); + tc.name = fc.at("name"); + const auto & args = fc.at("arguments"); + if (args.is_string()) { + tc.arguments = args; + } else { + tc.arguments = args.dump(); + } if (tool_call.contains("id")) { tc.id = tool_call.at("id"); } @@ -357,7 +316,9 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa } } if (!has_content && !has_tool_calls) { - throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)"); + throw std::invalid_argument( + "Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & " + "https://github.com/ggml-org/llama.cpp/issues/12279)"); } if (message.contains("reasoning_content")) { msg.reasoning_content = message.at("reasoning_content"); @@ -463,12 +424,13 @@ json common_chat_tools_to_json_oaicompat(const std::vector & t auto result = json::array(); for (const auto & tool : tools) { result.push_back({ - {"type", "function"}, - {"function", { - {"name", tool.name}, - {"description", tool.description}, - {"parameters", json::parse(tool.parameters)}, - }}, + { "type", "function" }, + { "function", + { + { "name", tool.name }, + { "description", tool.description }, + { "parameters", json::parse(tool.parameters) }, + } }, }); } return result; @@ -486,16 +448,20 @@ json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) { json tool_call; tool_call["index"] = diff.tool_call_index; if (!diff.tool_call_delta.id.empty()) { - tool_call["id"] = diff.tool_call_delta.id; + tool_call["id"] = diff.tool_call_delta.id; tool_call["type"] = "function"; } - json function = json::object(); - if (!diff.tool_call_delta.name.empty()) { - function["name"] = diff.tool_call_delta.name; + if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) { + json function = json::object(); + if (!diff.tool_call_delta.name.empty()) { + function["name"] = diff.tool_call_delta.name; + } + if (!diff.tool_call_delta.arguments.empty()) { + function["arguments"] = diff.tool_call_delta.arguments; + } + tool_call["function"] = function; } - function["arguments"] = diff.tool_call_delta.arguments; - tool_call["function"] = function; - delta["tool_calls"] = json::array({tool_call}); + delta["tool_calls"] = json::array({ tool_call }); } return delta; } @@ -504,13 +470,13 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { if (use_jinja) { try { common_chat_msg msg; - msg.role = "user"; + msg.role = "user"; msg.content = "test"; auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl); common_chat_templates_inputs inputs; - inputs.messages = {msg}; + inputs.messages = { msg }; common_chat_templates_apply(tmpls.get(), inputs); return true; @@ -519,28 +485,28 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { return false; } } - llama_chat_message chat[] = {{"user", "test"}}; + llama_chat_message chat[] = { + { "user", "test" } + }; const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0); return res >= 0; } -std::string common_chat_format_single( - const struct common_chat_templates * tmpls, - const std::vector & past_msg, - const common_chat_msg & new_msg, - bool add_ass, - bool use_jinja) { - +std::string common_chat_format_single(const struct common_chat_templates * tmpls, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass, + bool use_jinja) { common_chat_templates_inputs inputs; inputs.use_jinja = use_jinja; - inputs.add_bos = tmpls->add_bos; - inputs.add_eos = tmpls->add_eos; + inputs.add_bos = tmpls->add_bos; + inputs.add_eos = tmpls->add_eos; std::string fmt_past_msg; if (!past_msg.empty()) { - inputs.messages = past_msg; + inputs.messages = past_msg; inputs.add_generation_prompt = false; - fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt; + fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt; } std::ostringstream ss; // if the past_msg ends with a newline, we must preserve it in the formatted version @@ -550,37 +516,39 @@ std::string common_chat_format_single( // format chat with new_msg inputs.messages.push_back(new_msg); inputs.add_generation_prompt = add_ass; - auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt; + auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt; // get the diff part ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); return ss.str(); } -std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map & chat_template_kwargs) { +std::string common_chat_format_example(const struct common_chat_templates * tmpls, + bool use_jinja, + const std::map & chat_template_kwargs) { common_chat_templates_inputs inputs; - inputs.use_jinja = use_jinja; - inputs.add_bos = tmpls->add_bos; - inputs.add_eos = tmpls->add_eos; + inputs.use_jinja = use_jinja; + inputs.add_bos = tmpls->add_bos; + inputs.add_eos = tmpls->add_eos; inputs.chat_template_kwargs = chat_template_kwargs; - auto add_simple_msg = [&](auto role, auto content) { + auto add_simple_msg = [&](auto role, auto content) { common_chat_msg msg; - msg.role = role; + msg.role = role; msg.content = content; inputs.messages.push_back(msg); }; - add_simple_msg("system", "You are a helpful assistant"); - add_simple_msg("user", "Hello"); + add_simple_msg("system", "You are a helpful assistant"); + add_simple_msg("user", "Hello"); add_simple_msg("assistant", "Hi there"); - add_simple_msg("user", "How are you?"); + add_simple_msg("user", "How are you?"); return common_chat_templates_apply(tmpls, inputs).prompt; } -#define CHATML_TEMPLATE_SRC \ - "{%- for message in messages -%}\n" \ +#define CHATML_TEMPLATE_SRC \ + "{%- for message in messages -%}\n" \ " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \ - "{%- endfor -%}\n" \ - "{%- if add_generation_prompt -%}\n" \ - " {{- '<|im_start|>assistant\n' -}}\n" \ + "{%- endfor -%}\n" \ + "{%- if add_generation_prompt -%}\n" \ + " {{- '<|im_start|>assistant\n' -}}\n" \ "{%- endif -%}" void common_chat_templates_free(struct common_chat_templates * tmpls) { @@ -598,19 +566,16 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm return tmpls->template_tool_use->source(); } return ""; - } else { - LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str()); } + LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str()); } return tmpls->template_default->source(); } -common_chat_templates_ptr common_chat_templates_init( - const struct llama_model * model, - const std::string & chat_template_override, - const std::string & bos_token_override, - const std::string & eos_token_override) -{ +common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model, + const std::string & chat_template_override, + const std::string & bos_token_override, + const std::string & eos_token_override) { std::string default_template_src; std::string template_tool_use_src; @@ -619,7 +584,7 @@ common_chat_templates_ptr common_chat_templates_init( GGML_ASSERT(model != nullptr); const auto * str = llama_model_chat_template(model, /* name */ nullptr); if (str) { - default_template_src = str; + default_template_src = str; has_explicit_template = true; } str = llama_model_chat_template(model, /* name */ "tool_use"); @@ -641,34 +606,40 @@ common_chat_templates_ptr common_chat_templates_init( // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633 if (default_template_src.find("<|channel|>") != std::string::npos - // search for the error message and patch it - && default_template_src.find("in message.content or") != std::string::npos) { + // search for the error message and patch it + && default_template_src.find("in message.content or") != std::string::npos) { string_replace_all(default_template_src, - "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}", - "{%- if false %}"); + "{%- if \"<|channel|>analysis<|message|>\" in message.content or " + "\"<|channel|>final<|message|>\" in message.content %}", + "{%- if false %}"); } // TODO @aldehir : this is a temporary fix, pending Minja changes // Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664 if (default_template_src.find("[TOOL_CALLS]") != std::string::npos - // search for the error message and patch it - && default_template_src.find("if (message['content'] is none or") != std::string::npos) { + // search for the error message and patch it + && default_template_src.find("if (message['content'] is none or") != std::string::npos) { string_replace_all(default_template_src, - "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}", - "{%- if false %}"); + "{%- if (message['content'] is none or message['content'] == '' or " + "message['content']|length == 0) and (message['tool_calls'] is not defined or " + "message['tool_calls'] is none or message['tool_calls']|length == 0) %}", + "{%- if false %}"); } std::string token_bos = bos_token_override; std::string token_eos = eos_token_override; - bool add_bos = false; - bool add_eos = false; + bool add_bos = false; + bool add_eos = false; if (model) { - const auto * vocab = llama_model_get_vocab(model); - const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { + const auto * vocab = llama_model_get_vocab(model); + const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { if (token == LLAMA_TOKEN_NULL) { - if (default_template_src.find(jinja_variable_name) != std::string::npos - || template_tool_use_src.find(jinja_variable_name) != std::string::npos) { - LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name); + if (default_template_src.find(jinja_variable_name) != std::string::npos || + template_tool_use_src.find(jinja_variable_name) != std::string::npos) { + LOG_WRN( + "common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't " + "work as intended.\n", + name); } return std::string(); } @@ -676,13 +647,13 @@ common_chat_templates_ptr common_chat_templates_init( }; token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token"); token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token"); - add_bos = llama_vocab_get_add_bos(vocab); - add_eos = llama_vocab_get_add_eos(vocab); + add_bos = llama_vocab_get_add_bos(vocab); + add_eos = llama_vocab_get_add_eos(vocab); } common_chat_templates_ptr tmpls(new common_chat_templates()); tmpls->has_explicit_template = has_explicit_template; - tmpls->add_bos = add_bos; - tmpls->add_eos = add_eos; + tmpls->add_bos = add_bos; + tmpls->add_eos = add_eos; try { tmpls->template_default = std::make_unique(default_template_src, token_bos, token_eos); } catch (const std::exception & e) { @@ -703,36 +674,12 @@ common_chat_templates_ptr common_chat_templates_init( const char * common_chat_format_name(common_chat_format format) { switch (format) { - case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only"; - case COMMON_CHAT_FORMAT_GENERIC: return "Generic"; - case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo"; - case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral"; - case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x"; - case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools"; - case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1"; - case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; - case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1"; - case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; - case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; - case COMMON_CHAT_FORMAT_GRANITE: return "Granite"; - case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS"; - case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS"; - case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2"; - case COMMON_CHAT_FORMAT_APERTUS: return "Apertus"; - case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools"; - case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2"; - case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5"; - case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2"; - case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder"; - case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5"; - case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo"; - case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open"; - case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE"; - case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple"; - case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native"; - case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed"; + case COMMON_CHAT_FORMAT_CONTENT_ONLY: + return "Content-only"; + case COMMON_CHAT_FORMAT_PEG_SIMPLE: + return "peg-simple"; + case COMMON_CHAT_FORMAT_PEG_NATIVE: + return "peg-native"; default: throw std::runtime_error("Unknown chat format"); } @@ -740,10 +687,14 @@ const char * common_chat_format_name(common_chat_format format) { const char * common_reasoning_format_name(common_reasoning_format format) { switch (format) { - case COMMON_REASONING_FORMAT_NONE: return "none"; - case COMMON_REASONING_FORMAT_AUTO: return "auto"; - case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek"; - case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy"; + case COMMON_REASONING_FORMAT_NONE: + return "none"; + case COMMON_REASONING_FORMAT_AUTO: + return "auto"; + case COMMON_REASONING_FORMAT_DEEPSEEK: + return "deepseek"; + case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: + return "deepseek-legacy"; default: throw std::runtime_error("Unknown reasoning format"); } @@ -752,11 +703,14 @@ const char * common_reasoning_format_name(common_reasoning_format format) { common_reasoning_format common_reasoning_format_from_name(const std::string & format) { if (format == "none") { return COMMON_REASONING_FORMAT_NONE; - } else if (format == "auto") { + } + if (format == "auto") { return COMMON_REASONING_FORMAT_AUTO; - } else if (format == "deepseek") { + } + if (format == "deepseek") { return COMMON_REASONING_FORMAT_DEEPSEEK; - } else if (format == "deepseek-legacy") { + } + if (format == "deepseek-legacy") { return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; } throw std::runtime_error("Unknown reasoning format: " + format); @@ -772,7 +726,8 @@ static void foreach_function(const json & tools, const std::function & fn) { +static void foreach_parameter(const json & function, + const std::function & fn) { if (!function.contains("parameters") || !function.at("parameters").is_object()) { return; } @@ -780,7 +735,7 @@ static void foreach_parameter(const json & function, const std::function required; if (params.contains("required") && params.at("required").is_array()) { params.at("required").get_to(required); @@ -791,19 +746,19 @@ static void foreach_parameter(const json & function, const std::function & messages_override = std::nullopt, - const std::optional & tools_override = std::nullopt, - const std::optional & additional_context = std::nullopt) -{ + const std::optional & messages_override, + const std::optional & tools_override, + const std::optional & additional_context) { jinja::context ctx(tmpl.source()); nlohmann::ordered_json inp = nlohmann::ordered_json{ {"messages", messages_override.has_value() ? *messages_override : inputs.messages}, {"bos_token", tmpl.bos_token()}, {"eos_token", tmpl.eos_token()}, + {"enable_thinking", inputs.enable_thinking}, }; if (tools_override.has_value() || !inputs.tools.empty()) { inp["tools"] = tools_override.has_value() ? *tools_override : inputs.tools; @@ -829,7 +784,7 @@ static std::string apply( // render jinja::runtime runtime(ctx); const jinja::value results = runtime.execute(tmpl.prog); - auto parts = runtime.gather_string_parts(results); + auto parts = jinja::runtime::gather_string_parts(results); std::string result = parts->as_string().str(); @@ -843,265 +798,8 @@ static std::string apply( return result; } -static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - auto tool_call_schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - auto tool_schema = json { - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - }}, - {"required", json::array({"name", "arguments"})}, - }; - if (function.contains("description")) { - tool_schema["description"] = function.at("description"); - } - if (inputs.parallel_tool_calls) { - tool_schema.at("properties")["id"] = { - {"type", "string"}, - {"minLength", 4}, - }; - tool_schema.at("required").push_back("id"); - } - tool_call_schemas.emplace_back(tool_schema); - }); - const auto tool_call = - inputs.parallel_tool_calls - ? json { - {"type", "object"}, - {"properties", { - {"tool_calls", { - {"type", "array"}, - {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { - {"anyOf", tool_call_schemas}, - }}, - {"minItems", 1}, - }}, - }}, - {"required", json::array({"tool_calls"})}, - } - : json { - {"type", "object"}, - {"properties", { - {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { - {"anyOf", tool_call_schemas}, - }}, - }}, - {"required", json::array({"tool_call"})}, - }; - const auto schema = - inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED - ? json { - {"anyOf", json::array({ - tool_call, - { - {"type", "object"}, - {"properties", { - {"response", inputs.json_schema.is_null() - ? json {{"type", "string"}} - : inputs.json_schema - }, - }}, - {"required", json::array({"response"})}, - }, - })} - } - : tool_call; - - data.grammar_lazy = false; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - builder.add_schema("root", schema); - }); - - auto tweaked_messages = tmpl.add_system( - inputs.messages, - "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request"); - - // ensure all messages has "content" field - for (auto & message : tweaked_messages) { - if (!message.contains("content") || message["content"].is_null()) { - message["content"] = ""; - } - } - - data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages); - data.format = COMMON_CHAT_FORMAT_GENERIC; - return data; -} - -static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - // Important note: the model is probably trained to take a JSON stringified arguments value. - // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object. - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - {"id", { - {"type", "string"}, - // Nemo's template expects a 9-character alphanumeric ID. - {"pattern", "^[a-zA-Z0-9]{9}$"}, - }}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema)); - }); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}); - data.preserved_tokens = { - "[TOOL_CALLS]", - }; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO; - return data; -} - - -// Case-insensitive find -static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) { - auto it = std::search( - haystack.begin() + pos, haystack.end(), - needle.begin(), needle.end(), - [](char a, char b) { return std::tolower(a) == std::tolower(b); } - ); - return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it); -} - -static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - const auto is_json_schema_provided = !inputs.json_schema.is_null(); - const auto is_grammar_provided = !inputs.grammar.empty(); - const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty(); - - // the logic requires potentially modifying the messages - auto tweaked_messages = inputs.messages; - - auto replace_json_schema_marker = [](json & messages) -> bool { - static std::string marker1 = "force json schema.\n"; - static std::string marker2 = "force json schema."; - - if (messages.empty() || messages.at(0).at("role") != "system") { - return false; - } - - std::string content = messages.at(0).at("content"); - - for (const auto & marker : {marker1, marker2}) { - const auto pos = ifind_string(content, marker); - if (pos != std::string::npos) { - content.replace(pos, marker.length(), ""); - // inject modified content back into the messages - messages.at(0).at("content") = content; - return true; - } - } - - return false; - }; - - // Lfm2 model does not natively work with json, but can generally understand the tools structure - // - // Example of the pytorch dialog structure: - // <|startoftext|><|im_start|>system - // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|> - // <|im_start|>user - // What is the current status of candidate ID 12345?<|im_end|> - // <|im_start|>assistant - // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|> - // <|im_start|>tool - // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|> - // <|im_start|>assistant - // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|> - // - // For the llama server compatibility with json tools semantic, - // the client can add "Follow json schema." line into the system message prompt to force the json output. - // - if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) { - // server/utils.hpp prohibits that branch for the custom grammar anyways - throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar"); - } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) { - LOG_INF("%s: Using tools to build a grammar\n", __func__); - - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - - builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\""); - }); - // model has no concept of tool selection mode choice, - // if the system prompt rendered correctly it will produce a tool call - // the grammar goes inside the tool call body - data.grammar_lazy = true; - data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}}; - data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"}; - data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS; - } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) { - LOG_INF("%s: Using tools without json schema or grammar\n", __func__); - // output those tokens - data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"}; - } else if (is_json_schema_provided) { - LOG_INF("%s: Using provided json schema to build a grammar\n", __func__); - data.grammar = json_schema_to_grammar(inputs.json_schema); - } else if (is_grammar_provided) { - LOG_INF("%s: Using provided grammar\n", __func__); - data.grammar = inputs.grammar; - } else { - LOG_INF("%s: Using content relying on the template\n", __func__); - } - - data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages); - LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str()); - - return data; -} - -static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) { +static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, + const struct templates_params & inputs) { common_chat_params data; // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja @@ -1119,8 +817,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ // If message contains `reasoning_content`, add it as a block of type `thinking` if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) { content.push_back({ - {"type", "thinking"}, - {"thinking", msg.at("reasoning_content").get()}, + { "type", "thinking" }, + { "thinking", msg.at("reasoning_content").get() }, }); } @@ -1128,8 +826,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ if (msg.contains("content")) { if (msg.at("content").is_string()) { content.push_back({ - {"type", "text"}, - {"text", msg.at("content").get()}, + { "type", "text" }, + { "text", msg.at("content").get() }, }); } else if (msg.at("content").is_array()) { auto blocks = msg.at("content"); @@ -1137,18 +835,18 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ } } - auto adjusted = msg; + auto adjusted = msg; adjusted["content"] = content; adjusted.erase("reasoning_content"); adjusted_messages.push_back(adjusted); } - auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; - auto include_grammar = true; + auto include_grammar = true; - data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages); - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + data.prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override = */ adjusted_messages); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; data.preserved_tokens = { "[THINK]", "[/THINK]", @@ -1156,13 +854,15 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ "[ARGS]", }; - auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) { - auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps(); + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto reasoning = + extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps(); // Response format parser if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) { // Ministral wants to emit json surrounded by code fences - return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```"; + return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + << "```"; } // Tool call parser @@ -1170,17 +870,16 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ auto tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + std::string name = function.at("name"); + const auto & schema = function.at("parameters"); - tool_choice |= p.rule("tool-" + name, - p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") - + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) - ); + tool_choice |= + p.rule("tool-" + name, p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") + + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))); }); - auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; - auto max_calls = inputs.parallel_tool_calls ? -1 : 1; + auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; + auto max_calls = inputs.parallel_tool_calls ? -1 : 1; auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls)); return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls; @@ -1199,838 +898,32 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ data.grammar = build_grammar([&](const common_grammar_builder & builder) { foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - auto schema = function.at("parameters"); + auto schema = function.at("parameters"); builder.resolve_refs(schema); }); parser.build_grammar(builder, data.grammar_lazy); }); data.grammar_triggers = { - {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"} + { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]" } }; } return data; } -static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_MAGISTRAL; - data.preserved_tokens = { - "[THINK]", - "[/THINK]", - }; - - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - {"id", { - {"type", "string"}, - {"pattern", "^[a-zA-Z0-9]{9}$"}, - }}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema)); - }); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}); - data.preserved_tokens.push_back("[TOOL_CALLS]"); - } else { - data.grammar_lazy = false; - if (!inputs.json_schema.is_null()) { - if (!inputs.grammar.empty()) { - throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); - } - data.grammar = json_schema_to_grammar(inputs.json_schema); - } else { - data.grammar = inputs.grammar; - } - } - - return data; -} - -static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - auto adjusted_messages = json::array(); - for (const auto & msg : inputs.messages) { - auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string(); - auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array(); - if (has_reasoning_content && has_tool_calls) { - auto adjusted_message = msg; - adjusted_message["tool_plan"] = msg.at("reasoning_content"); - adjusted_message.erase("reasoning_content"); - adjusted_messages.push_back(adjusted_message); - } else { - adjusted_messages.push_back(msg); - } - } - data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); - data.format = COMMON_CHAT_FORMAT_COMMAND_R7B; - if (string_ends_with(data.prompt, "<|START_THINKING|>")) { - if (!inputs.enable_thinking) { - data.prompt += "<|END_THINKING|>"; - } else { - data.thinking_forced_open = true; - } - } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) { - data.prompt += "<|START_THINKING|><|END_THINKING|>"; - } - - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"tool_call_id", { - {"type", "string"}, - // Command-R's template expects an integer string. - {"pattern", "^[0-9]{1,10}$"}, - }}, - {"tool_name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"parameters", function.at("parameters")}, - }}, - {"required", json::array({"tool_call_id", "tool_name", "parameters"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") + - "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\""); - }); - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") + - "(<\\|START_ACTION\\|>)[\\s\\S]*" - }); - data.preserved_tokens = { - "<|START_ACTION|>", - "<|END_ACTION|>", - "<|START_RESPONSE|>", - "<|END_RESPONSE|>", - "<|START_THINKING|>", - "<|END_THINKING|>", - }; - return data; -} - -static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) { - if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) { - throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties"); - } - const auto & parameters_properties = parameters.at("properties"); - const auto & parameters_required = parameters.at("required"); - for (const auto & prop : expected_properties) { - if (!parameters_properties.contains(prop)) { - throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT - } - if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) { - throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT - } - } - if (parameters_properties.size() != expected_properties.size()) { - throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", ")); - } -} - -static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) { - auto builtin_tools = json::array(); - common_chat_params data; - if (!inputs.tools.is_null()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - - auto handle_builtin_tool = [&](const std::string & name, const json & parameters) { - if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") { - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py - expect_tool_parameters(name, parameters, {"query"}); - } else if (name == "python" || name == "code_interpreter") { - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py - expect_tool_parameters(name, parameters, {"code"}); - } else { - return false; - } - - std::vector kvs; - for (const auto & [key, value] : parameters.at("properties").items()) { - kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT - } - - tool_rules.push_back( - builder.add_rule( - name + "-call", - "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\"")); - builtin_tools.push_back(name); - - return true; - }; - - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - - // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime - if (allow_python_tag_builtin_tools) { - handle_builtin_tool(name, parameters); - } - tool_rules.push_back( - builder.add_rule( - name + "-call", - "\"{\" space " - "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? " - " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space " - " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " " - "\"}\" space")); - }); - // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name. - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*", - }); - if (!builtin_tools.empty()) { - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"}); - data.preserved_tokens.push_back("<|python_tag|>"); - } - // Allow a few empty lines on top of the usual constrained json schema space rule. - builder.add_rule("root", string_join(tool_rules, " | ")); - data.additional_stops.push_back("<|eom_id|>"); - }); - data.format = allow_python_tag_builtin_tools && !builtin_tools.empty() - ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS - : COMMON_CHAT_FORMAT_LLAMA_3_X; - } else { - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - } - data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json { - {"date_string", format_time(inputs.now, "%d %b %Y")}, - {"tools_in_user_message", false}, - {"builtin_tools", builtin_tools}, - }); - return data; -} - -static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Generate the prompt using the apply() function with the template - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2; - - // Handle thinking tags appropriately based on inputs.enable_thinking - if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - // When tools are present, build grammar for the format, similar to CommandR, but without tool call ID - if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = true; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - { "type", "object" }, - { "properties", - { - { "name", - { - { "type", "string" }, - { "const", function.at("name") }, - } }, - { "arguments", function.at("parameters") }, - } }, - { "required", json::array({ "name", "arguments" }) }, - }); - }); - auto schema = json{ - { "type", "array" }, - { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } }, - { "minItems", 1 }, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - "\"\" " + builder.add_schema("tool_calls", schema) + - " \"\""); - }); - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? - "[\\s\\S]*?(\\s*)" : - "(?:[\\s\\S]*?\\s*)?") + - "()[\\s\\S]*" }); - } - return data; -} - -static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED; - - // Handle thinking tags appropriately based on inputs.enable_thinking - if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - data.preserved_tokens = { - "", - "", - "", - "", - }; - - auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); - auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; - auto include_grammar = true; - - auto parser = build_chat_peg_constructed_parser([&](auto & p) { - auto reasoning = p.eps(); - if (inputs.enable_thinking && extract_reasoning) { - auto reasoning_content = p.reasoning(p.until("")) + ("" | p.end()); - if (data.thinking_forced_open) { - reasoning = reasoning_content; - } - } - - // Response format parser - if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) { - return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema)); - } - - // Tool call parser - if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) { - auto tool_choice = p.choice(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - - auto schema_info = common_schema_info(); - schema_info.resolve_refs(parameters); - - auto tool_open = "\n"; - auto tool_close = p.literal("\n"); - auto args = p.sequence(); - auto arg_string = p.rule("xml-arg-string", p.until_one_of({ - "\n", - "\n" - })); - - foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) { - auto rule_name = "tool-" + name + "-arg-" + param_name; - - auto arg_open = "\n"; - auto arg_close = p.literal("\n"); - auto arg_value = p.eps(); - - if (schema_info.resolves_to_string(param_schema)) { - arg_value = p.tool_arg_string_value(arg_string) + "\n"; - } else { - arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema)); - } - - // Model may or my not close with - auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close))); - args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1); - }); - - tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close)); - }); - - auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; - auto max_calls = inputs.parallel_tool_calls ? -1 : 1; - auto tool_call = p.rule("tool-call", "\n" + tool_choice + "" + p.space()); - auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls)); - - return reasoning << p.content(p.until("")) << tool_calls; - } - - // Content only parser - include_grammar = false; - return reasoning << p.content(p.rest()); - }); - - data.parser = parser.save(); - - if (include_grammar) { - data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; - - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - auto schema = function.at("parameters"); - builder.resolve_refs(schema); - }); - parser.build_grammar(builder, data.grammar_lazy); - }); - - data.grammar_triggers = { - {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ""} - }; - } - - return data; -} - - -static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Generate the prompt using the apply() function with the template - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_APERTUS; - - // Handle thinking tags appropriately based on inputs.enable_thinking - if (string_ends_with(data.prompt, "<|inner_prefix|>")) { - if (!inputs.enable_thinking) { - data.prompt += "<|inner_suffix|>"; - } else { - data.thinking_forced_open = true; - } - } - - // When tools are present, build grammar for the <|tools_prefix|> format - if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = true; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - { "type", "object" }, - { "properties", - { - { function.at("name"), function.at("parameters") } - } }, - { "required", json::array({ function.at("name") }) }, - }); - }); - auto schema = json{ - { "type", "array" }, - { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } }, - { "minItems", 1 }, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") + - "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\""); - }); - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? - "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" : - "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") + - "(<\\|tools_prefix\\|>)[\\s\\S]*" }); - data.preserved_tokens = { - "<|system_start|>", - "<|system_end|>", - "<|developer_start|>", - "<|developer_end|>", - "<|user_start|>", - "<|user_end|>", - "<|assistant_start|>", - "<|assistant_end|>", - "<|inner_prefix|>", - "<|inner_suffix|>", - "<|tools_prefix|>", - "<|tools_suffix|>", - }; - } - return data; -} - -static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - auto prompt = apply(tmpl, inputs); - - // Hacks to fix the official (broken) prompt. - // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead, - // until the official template is fixed. - if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) { - // Don't leave the chat dangling after tool results - if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) { - prompt += "<|end▁of▁sentence|>"; - if (inputs.add_generation_prompt) { - prompt += "<|Assistant|>"; - } - } - // Fix up tool call delta example added by Minja - prompt = std::regex_replace( - prompt, - std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"), - "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2"); - } - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1; - if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_rule(name + "-call", - "( \"<|tool▁call▁begin|>\" )? \"function<|tool▁sep|>" + name + "\\n" - "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " " - "\"```<|tool▁call▁end|>\"")); - }); - // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, - // so we accept common variants (then it's all constrained) - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) " - "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " - "\"<|tool▁calls▁end|>\"" - " space"); - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + - "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*" - }); - data.preserved_tokens = { - "", - "", - "<|tool▁calls▁begin|>", - "<|tool▁call▁begin|>", - "<|tool▁sep|>", - "<|tool▁call▁end|>", - "<|tool▁calls▁end|", - }; - }); - } - return data; -} - -static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Pass thinking context for DeepSeek V3.1 template - json additional_context = { - {"thinking", inputs.enable_thinking}, - }; - - auto prompt = apply(tmpl, inputs, - /* messages_override= */ inputs.messages, - /* tools_override= */ std::nullopt, - additional_context); - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - if (string_ends_with(data.prompt, "")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_rule(name + "-call", - "( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>" - "\" " + builder.add_schema(name + "-args", parameters) + " " - "\"<|tool▁call▁end|>\"")); - }); - // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, - // so we accept common variants (then it's all constrained) - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) " - "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " - "\"<|tool▁calls▁end|>\"" - " space"); - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + - "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*" - }); - data.preserved_tokens = { - "", - "", - "<|tool▁calls▁begin|>", - "<|tool▁call▁begin|>", - "<|tool▁sep|>", - "<|tool▁call▁end|>", - "<|tool▁calls▁end|>", - }; - }); - } - return data; -} - -static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_MINIMAX_M2; - - // Handle thinking tags based on prompt ending - if (string_ends_with(data.prompt, "\n")) { - if (!params.enable_thinking) { - // Close the thinking tag immediately if thinking is disabled - data.prompt += "\n\n"; - } else { - // Mark thinking as forced open (template started with ) - data.thinking_forced_open = true; - } - } - - // Preserve MiniMax-M2 special tokens - data.preserved_tokens = { - "", - "", - "", - "", - }; - - // build grammar for tool call - static const xml_tool_call_format form { - /* form.scope_start = */ "\n", - /* form.tool_start = */ "\n", - /* form.key_start = */ "", - /* form.val_end = */ "\n", - /* form.tool_end = */ "\n", - /* form.scope_end = */ "", - }; - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML; - - data.preserved_tokens = { - "", - "", - "", - "", - }; - - // build grammar for tool call - static const xml_tool_call_format form { - /* form.scope_start = */ "\n", - /* form.tool_start = */ "\n", - /* form.key_start = */ "\n", - /* form.val_end = */ "\n\n", - /* form.tool_end = */ "\n", - /* form.scope_end = */ "", - }; - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_KIMI_K2; - - data.preserved_tokens = { - "", - "", - "<|tool_calls_section_begin|>", - "<|tool_call_begin|>", - "<|tool_call_argument_begin|>", - "<|tool_call_end|>", - "<|tool_calls_section_end|>", - "<|im_end|>", - "<|im_system|>", - "<|im_middle|>", - }; - - data.additional_stops.insert(data.additional_stops.end(), { - "<|im_end|>", - "<|im_middle|>" - }); - // build grammar for tool call - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = "<|tool_calls_section_begin|>"; - form.tool_start = "<|tool_call_begin|>"; - form.tool_sep = "<|tool_call_argument_begin|>{"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}<|tool_call_end|>"; - form.scope_end = "<|tool_calls_section_end|>"; - form.raw_argval = false; - form.last_val_end = ""; - return form; - })(); - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_APRIEL_1_5; - - data.preserved_tokens = { - "", - "", - "", - "", - }; - - // build grammar for tool call - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = "["; - form.tool_start = "{\"name\": \""; - form.tool_sep = "\", \"arguments\": {"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}, "; - form.scope_end = "]"; - form.raw_argval = false; - form.last_val_end = ""; - form.last_tool_end = "}"; - return form; - })(); - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) { - common_chat_params data; - data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO; - - data.preserved_tokens = { - "", - "", - }; - - // build grammar for tool call - static const xml_tool_call_format form = ([]() { - xml_tool_call_format form {}; - form.scope_start = "\n"; - form.tool_start = "\n{\"name\": \""; - form.tool_sep = "\", \"arguments\": {"; - form.key_start = "\""; - form.key_val_sep = "\": "; - form.val_end = ", "; - form.tool_end = "}\n"; - form.scope_end = ""; - form.raw_argval = false; - form.last_val_end = ""; - return form; - })(); - build_grammar_xml_tool_call(data, params.tools, form); - - return data; -} - -static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) { +static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, + const struct templates_params & inputs) { common_chat_params data; // Copy reasoning to the "thinking" field as expected by the gpt-oss template auto adjusted_messages = json::array(); for (const auto & msg : inputs.messages) { auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string(); - auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array(); + auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array(); if (has_reasoning_content && has_tool_calls) { - auto adjusted_message = msg; + auto adjusted_message = msg; adjusted_message["thinking"] = msg.at("reasoning_content"); adjusted_messages.push_back(adjusted_message); } else { @@ -2038,7 +931,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp } } - auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); + auto prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override= */ adjusted_messages); // Check if we need to replace the return token with end token during // inference and without generation prompt. For more details see: @@ -2052,895 +945,118 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp } data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_GPT_OSS; + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; // These special tokens are required to parse properly, so we include them // even if parse_tool_calls is false. data.preserved_tokens = { - "<|channel|>", - "<|constrain|>", - "<|message|>", - "<|start|>", - "<|end|>", + "<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>", }; - if (!inputs.json_schema.is_null()) { - data.grammar_lazy = false; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schema = inputs.json_schema; - builder.resolve_refs(schema); + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; + auto include_grammar = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && has_tools; - auto not_end = builder.add_rule("not-end", - "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]"); - auto analysis = builder.add_rule("analysis", - "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\""); - auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+"); - auto final = builder.add_rule("final", - "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " + - builder.add_schema("response", schema) - ); + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + const std::string END = "<|end|>"; + const std::string START = "<|start|>"; + const std::string MESSAGE = "<|message|>"; + const std::string CHANNEL = "<|channel|>"; + const std::string CONSTRAIN = "<|constrain|>"; + const std::string START_ASSISTANT = START + "assistant"; + const std::string CHANNEL_ANALYSIS = CHANNEL + "analysis"; + const std::string CHANNEL_COMMENTARY = CHANNEL + "commentary"; + const std::string CHANNEL_FINAL = CHANNEL + "final"; - builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final); - }); - } + auto the_end = END | p.end(); - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - // tool calls can appear in commentary or analysis channels - auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )"); + const std::string analysis_header = CHANNEL_ANALYSIS + MESSAGE; + auto segment_content = p.until(END); + auto analysis_segment = extract_reasoning ? + p.literal(analysis_header) + p.reasoning(segment_content) + p.until(END) + the_end : + p.content(analysis_header + p.until(END) + the_end); - std::vector tool_rules_recipient_in_role; - std::vector tool_rules_recipient_in_channel; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); + auto channel_header_content = p.until_one_of({ " to=functions.", MESSAGE }); + auto content_header = p.choice({ p.literal(CHANNEL_COMMENTARY), p.literal(CHANNEL_FINAL) }); + auto content_segment = p.rule("content-segment", content_header + channel_header_content + MESSAGE + + p.content(segment_content) + the_end); - tool_rules_recipient_in_role.push_back( - builder.add_rule(name + "-call", - "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " + - builder.add_schema(name + "-args", parameters) - ) - ); - - tool_rules_recipient_in_channel.push_back( - builder.add_rule(name + "-call", - "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " + - builder.add_schema(name + "-args", parameters) - ) - ); - }); - - auto recipient_in_channel = builder.add_rule("recipient_in_channel", - channel + " \" to=functions.\" ( " + - string_join(tool_rules_recipient_in_channel, " | ") + " )" - ); - - if (data.grammar_lazy) { - auto recipient_in_role = builder.add_rule("recipient_in_role", - "\"<|start|>assistant\"? \" to=functions.\" ( " + - string_join(tool_rules_recipient_in_role, " | ") + " )" - ); - - builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel); - } else { - auto not_end = builder.add_rule("not-end", - "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]"); - auto analysis = builder.add_rule("analysis", - "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\""); - auto commentary = builder.add_rule("commentary", - "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\""); - - auto recipient_in_role = builder.add_rule("recipient_in_role", - "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )" - ); - - builder.add_rule("root", - "( " + analysis + " \"<|start|>assistant\" )? " + - "( " + commentary + " \"<|start|>assistant\" )? " + - "( " + recipient_in_role + " | " + recipient_in_channel + " )" - ); - } - - // Trigger on tool calls that appear in the commentary channel - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - "<\\|channel\\|>(?:commentary|analysis) to" - }); - - // Trigger tool calls that appear in the role section, either at the - // start or in the middle. - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - "^ to" - }); - - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - "<\\|start\\|>assistant to" - }); - }); - } - - return data; -} - -static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - std::string prompt = apply(tmpl, inputs); - - // match the existing trimming behavior - if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) { - prompt.erase(0, tmpl.bos_token().size()); - } - if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) { - prompt.erase(prompt.size() - tmpl.eos_token().size()); - } - if (string_ends_with(prompt, "")) { - if (!inputs.enable_thinking) { - prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - // add GLM preserved tokens - data.preserved_tokens = { - "<|endoftext|>", - "[MASK]", - "[gMASK]", - "[sMASK]", - "", - "", - "<|system|>", - "<|user|>", - "<|assistant|>", - "<|observation|>", - "<|begin_of_image|>", - "<|end_of_image|>", - "<|begin_of_video|>", - "<|end_of_video|>", - "<|begin_of_audio|>", - "<|end_of_audio|>", - "<|begin_of_transcription|>", - "<|end_of_transcription|>", - "<|code_prefix|>", - "<|code_middle|>", - "<|code_suffix|>", - "/nothink", - "", - "", - "", - "", - "", - "", - "", - "" - }; - - // extra GLM 4.5 stop word - data.additional_stops.insert(data.additional_stops.end(), { - "<|user|>", - "<|observation|>" - }); - - // build grammar for tool call - static const xml_tool_call_format form { - /* form.scope_start = */ "", - /* form.tool_start = */ "\n", - /* form.tool_sep = */ "\n", - /* form.key_start = */ "", - /* form.key_val_sep = */ "\n", - /* form.val_end = */ "\n", - /* form.tool_end = */ "\n", - /* form.scope_end = */ "", - }; - build_grammar_xml_tool_call(data, inputs.tools, form); - - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_GLM_4_5; - return data; -} - -static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { - LOG_DBG("%s\n", __func__); - common_chat_params data; - const std::optional additional_context = json { - {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")}, - {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))}, - }; - data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override =*/ std::nullopt, additional_context); - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function.at("name")}, - }}, - {"arguments", function.at("parameters")}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema)); - }); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["}); - data.preserved_tokens = { - " functools[", - }; - data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2; - } else { - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - } - return data; -} - -static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) { - // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}... - // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar - // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code. - common_chat_params data; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2; - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector first_tool_rules; - std::vector subsequent_tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - std::string args_pattern = "[\\s\\S]*"; - auto args_rule = builder.add_schema(name + "-args", parameters); - if (name == "python") { - args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*"); - } else { - args_pattern = "\\{" + args_pattern; - } - auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule); - first_tool_rules.push_back(call_rule); - if (inputs.parallel_tool_calls) { - subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule)); - } - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern, - }); - }); - data.preserved_tokens = { - "<|end_header_id|>", - }; - auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space"; - if (inputs.parallel_tool_calls) { - auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space"; - builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*"); - } else { - builder.add_rule("root", first_rule); - } - - }); - } - return data; -} - -static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { - // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt - common_chat_params data; - - if (!inputs.tools.is_null()) { - std::string python_code_argument_name; - auto has_raw_python = false; - - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - const auto & parameters = function.at("parameters"); - std::string name = function.at("name"); - if (name == "python" || name == "ipython") { - if (!parameters.contains("type")) { - throw std::runtime_error("Missing type in python tool"); - } - has_raw_python = true; - const auto & type = parameters.at("type"); - if (type == "object") { - auto properties = parameters.at("properties"); - for (auto it = properties.begin(); it != properties.end(); ++it) { - if (it.value().at("type") == "string") { - if (!python_code_argument_name.empty()) { - throw std::runtime_error("Multiple string arguments found in python tool"); - } - python_code_argument_name = it.key(); - } - } - if (python_code_argument_name.empty()) { - throw std::runtime_error("No string argument found in python tool"); - } - } else if (type != "string") { - throw std::runtime_error("Invalid type in python tool: " + type.dump()); - } - } - tool_rules.push_back(builder.add_rule(name + "-call", "\"\" " + builder.add_schema(name + "-args", parameters) + " \"\" space")); - }); - if (has_raw_python) { - tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*")); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"}); - data.preserved_tokens.push_back("<|python_tag|>"); - } - auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space"; - builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); - data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "\n")) { - if (!extra_context["enable_thinking"]) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - if (!inputs.tools.is_null()) { - // (content)?({"name": "foo", "arguments": {"a": 1}})* - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - std::vector tool_call_alts; - std::vector escaped_names; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_schema(name + "-call", { - {"type", "object"}, - {"properties", json { - {"name", json {{"const", name}}}, - {"arguments", parameters}, - }}, - {"required", json::array({"name", "arguments"})}, - })); - tool_call_alts.push_back(builder.add_rule( - name + "-function-tag", - "\"\" space " + - builder.add_schema(name + "-args", parameters) + " " - "\"\" space")); - - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_WORD, - "", - }); - auto escaped_name = regex_escape(name); - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - " alt_tags { - any_tool_call, - "\"\" space " + any_tool_call + " \"\"", - // The rest is just to accommodate common "good bad" outputs. - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - "\"\" space " + any_tool_call + " \"\"", - }; - auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space"); - tool_call_alts.push_back(wrappable_tool_call); - tool_call_alts.push_back( - "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space "); - auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); - // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, - // If thinking_forced_open, then we capture the tag in the grammar, - // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) - std::string(data.thinking_forced_open ? "(\\s*)" : "") + ( - "\\s*(" - "(?:" - "||||)?" - "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\"" - ")" - ")" - ), - }); - data.preserved_tokens = { - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "```", - "```json", - "```xml", - }; - }); - } - - return data; -} - -static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Pass thinking context for Granite template - json additional_context = { - {"thinking", inputs.enable_thinking}, - }; - - data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context); - data.format = COMMON_CHAT_FORMAT_GRANITE; - - if (string_ends_with(data.prompt, "\n") || string_ends_with(data.prompt, "")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - if (!inputs.tools.is_null()) { - // Granite uses <|tool_call|> followed by JSON list - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name + -"-args", { - {"type", "object"}, - {"properties", { - {"name", {{"const", name}}}, - {"arguments", parameters}, - }}, - {"required", json::array({"name", "arguments"})}, - }))); - }); - - auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")); - auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\""); - - if (data.thinking_forced_open) { - builder.add_rule("root", "\"\" space \"\" space [^<]* \"\" space \"<|tool_call|>\" space " + tool_list); - } else { - builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list); - } - - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_WORD, - "<|tool_call|>" - }); - - data.preserved_tokens = { - "", - "", - "", - "", - "<|tool_call|>", - }; - }); - } else { - // Handle thinking tags for non-tool responses - if (data.thinking_forced_open && inputs.enable_thinking) { - data.grammar_lazy = false; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - builder.add_rule("root", "\"\" space \"\" space .* \"\" space"); - }); - data.preserved_tokens = { - "", - "", - "", - "", - }; - } - } - - return data; -} - -static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // Copy `reasoning_content` to `reasoning` - auto adjusted_messages = json::array(); - for (const auto & msg : inputs.messages) { - if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) { - auto adjusted_message = msg; - adjusted_message["reasoning"] = msg.at("reasoning_content"); - adjusted_message.erase("reasoning_content"); - adjusted_messages.push_back(adjusted_message); - } else { - adjusted_messages.push_back(msg); - } - } - - auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); - auto include_grammar = true; - - auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); - - // Check if we need to replace the flush token with end token during inference and without generation prompt. - if (inputs.is_inference && !inputs.add_generation_prompt) { - static constexpr std::string_view return_token = "<|flush|>"; - static constexpr std::string_view end_token = "<|end|>"; - if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) { - prompt.replace(pos, return_token.length(), end_token); - } - } - - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - data.preserved_tokens = { - "<|think|>", - "<|content|>", - "<|begin|>", - "<|end|>", - "<|tool_calls|>", - "<|tool_call:begin|>", - "<|tool_call:end|>", - "<|tool_call:name|>", - "<|tool_call:args|>", - }; - - auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) { - auto lit_think = p.atomic(p.literal("<|think|>")); - auto lit_assistant_begin = p.atomic(p.literal("<|begin|>assistant")); - auto lit_content = p.atomic(p.literal("<|content|>")); - auto lit_end = p.atomic(p.literal("<|end|>")); - auto parser_until_end = p.until("<|end|>"); - - // reasoning <- "<|think|>" (!"<|end|>" .)* - auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end)); - - // content <- "<|content|>" (!"<|end|>" .)* - auto parser_content = p.rule("content", lit_content + p.content(parser_until_end)); - - // wrap_choice(items) <- item-choice wrapped* - // item-choice <- items[0] / ... / items[n] - // wrapped <- "<|end|><|begin|>assistant" item-choice - auto wrap_choice = [&](const std::vector & items) { - auto choice = p.choice(items); - return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice); - }; - - // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ... - auto wrap_seq = [&](const std::vector & items) { - auto seq = p.sequence(); - for (auto i = 0u; i < items.size(); i++) { - if (i == 0) { - seq += items[i]; - continue; - } - seq += lit_end + lit_assistant_begin + items[i]; - } - return seq; - }; - - // Response format parser - if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) { - auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema)); - return p.choice({ - wrap_seq({parser_reasoning, parser_response_format}), - wrap_seq({parser_response_format}) - }); + if (!inputs.json_schema.is_null()) { + auto final_header = p.literal(CHANNEL_FINAL); + auto constraint = p.optional(p.space() + p.literal(CONSTRAIN) + channel_header_content); + return p.optional(analysis_segment) + final_header + constraint + MESSAGE + + p.content(p.schema(p.json(), "response-format", inputs.json_schema)); } - auto lit_tool_call_begin = p.literal("<|tool_call:begin|>"); - auto lit_tool_call_name = p.literal("<|tool_call:name|>"); - auto lit_tool_call_args = p.literal("<|tool_call:args|>"); - auto lit_tool_call_end = p.literal("<|tool_call:end|>"); + auto segment = p.optional(START_ASSISTANT + p.space()) + p.choice({ content_segment, analysis_segment }); + auto contents = p.optional(segment + p.repeat(p.optional(p.space()) + segment, 0, -1)) + p.end(); // Tool call parser if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) { - auto parser_tool_call = p.choice(); + auto tool_choice = p.choice(); + foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); - // tool(name, schema) <- name "<|tool_call:args|>" schema - parser_tool_call |= p.rule("tool-" + name, - p.atomic(p.tool_name(p.literal(name)) + lit_tool_call_args) - + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))); + // Tool call can appear as: + // 1. In role header: " to=functions.NAME<|channel|>..." + // 2. In channel: "<|channel|>(analysis|commentary) to=functions.NAME..." + auto func_name = p.literal(" to=functions.") + p.tool_name(p.literal(name)); + + auto channel = p.literal(CHANNEL_COMMENTARY) | p.literal(CHANNEL_ANALYSIS); + auto constraint = p.space() + p.optional(p.literal(CONSTRAIN) + channel_header_content); + auto args = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", params)); + + // Pattern 1: recipient in role header + // " to=functions.NAME<|channel|>(analysis|commentary)[constraint]<|message|>ARGS" + auto tool_in_role = p.tool(p.tool_open(func_name + channel) + constraint + MESSAGE + args); + + // Pattern 2: recipient in channel header + // "<|channel|>(analysis|commentary) to=functions.NAME[constraint]<|message|>ARGS" + + auto tool_in_channel = p.tool(channel + p.tool_open(func_name + constraint + MESSAGE) + args); + + tool_choice |= p.trigger_rule("tool-" + name, tool_in_role | tool_in_channel); }); auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0; auto max_calls = inputs.parallel_tool_calls ? -1 : 1; - // tool-calls <- "<|tool_calls|>" tool-call+ - // tool-call <- "<|tool_call:begin|> call-id "<|tool_call:name|>" &([^<]+ "<|tool_call:args|>") tool-choice "<|tool_call:end|>" - // call-id <- [a-zA-Z0-9_-]+ - // tool-choice <- tool(t[0].name, t[0].schema) / ... / tool(t[n].name, t[n].schema) - auto parser_tool_calls = p.trigger_rule("tool-calls", - p.atomic(p.literal("<|tool_calls|>")) - + p.repeat( - p.tool_open( - lit_tool_call_begin - + p.tool_id(p.chars("[a-zA-Z0-9_-]", 1, -1)) - + lit_tool_call_name - + p.peek(p.chars("[^<]", 1, -1) + lit_tool_call_args)) - + parser_tool_call - + p.tool_close(lit_tool_call_end), - /* min = */ 1, - /* max = */ max_calls)); + auto role_start = p.optional(p.space() + p.literal(START_ASSISTANT)); + auto tool_call = p.rule("tool-call", p.repeat(role_start + tool_choice, min_calls, max_calls) + p.end()); - if (min_calls == 1) { - // If required, then try any combination of the reasoning, content, and tool call - return p.choice({ - wrap_seq({parser_reasoning, parser_content, parser_tool_calls}), - wrap_seq({parser_reasoning, parser_tool_calls}), - wrap_seq({parser_content, parser_tool_calls}), - wrap_seq({parser_tool_calls}) - }); - } - - return wrap_choice({parser_reasoning, parser_content, parser_tool_calls}); + return p.choice({ tool_call, p.one_or_more(segment) + tool_call }); } - // Content only parser - include_grammar = false; - return wrap_choice({parser_reasoning, parser_content}); + return contents; }); data.parser = parser.save(); if (include_grammar) { data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; - - data.grammar = build_grammar([&](const common_grammar_builder & builder) { + data.grammar = build_grammar([&](const common_grammar_builder & builder) { foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - auto schema = function.at("parameters"); + auto schema = function.at("parameters"); builder.resolve_refs(schema); }); parser.build_grammar(builder, data.grammar_lazy); }); data.grammar_triggers = { - {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls|>"} + { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" }, + { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "(?:<\\|end\\|>)(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" }, + { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, + "(?:<\\|start\\|>assistant\\s*)?(<\\|channel\\|>(?:commentary|analysis)\\s+to=functions)" } }; } return data; } -static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_EXAONE_MOE; - if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { - data.prompt += "\n\n"; - } else { - data.thinking_forced_open = true; - } - } - - if (inputs.tools.is_array() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - // Expect: {"name": "", "arguments": {...}} - tool_rules.push_back(builder.add_rule( - name + "-call", - "\"\" space " + - builder.add_schema(name + "-obj", json{ - {"type", "object"}, - {"properties", { - {"name", json{{"const", name}}}, - {"arguments", parameters}, - }}, - {"required", json::array({"name", "arguments"})}, - }) + - " space \"\" space")); - }); - - auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")); - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + - (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); - - data.grammar_triggers.push_back({ - COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, - std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)?" : "") + - "()[\\s\\S]*" - }); - data.preserved_tokens = { - "", - "", - "", - "", - }; - }); - } - - return data; -} - -static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - - // This template does not support tools or reasoning - // we just need to transform the messages into the correct schema - - templates_params inputs_new = inputs; - json & messages = inputs_new.messages; - - // default to chat_template_kwargs, or en-GB if not specified - std::string default_src_lang = inputs.extra_context.value("source_lang_code", "en-GB"); - std::string default_tgt_lang = inputs.extra_context.value("target_lang_code", "en-GB"); - - GGML_ASSERT(messages.is_array()); - for (auto & message : messages) { - if (message.contains("role") && message["role"].get() != "user") { - continue; - } - if (!message.contains("content")) { - message["content"] = json::array(); - } - if (message.contains("content") && !message["content"].is_array()) { - auto content_str = message["content"].get(); - // default to en-GB if not specified (to make common_chat_format_example works) - auto src_lang = message.contains("source_lang_code") - ? message["source_lang_code"].get() : default_src_lang; - auto tgt_lang = message.contains("target_lang_code") - ? message["target_lang_code"].get() : default_tgt_lang; - message["content"] = json::array({ - json{ - {"type", "text"}, - {"text", content_str}, - {"source_lang_code", src_lang}, - {"target_lang_code", tgt_lang}, - } - }); - } - } - - data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt); - data.format = COMMON_CHAT_FORMAT_GENERIC; - - return data; -} - -static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) { - common_chat_params data; - data.prompt = apply(tmpl, inputs); - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - data.grammar_lazy = false; - if (!inputs.json_schema.is_null()) { - if (!inputs.grammar.empty()) { - throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); - } - data.grammar = json_schema_to_grammar(inputs.json_schema); - } else { - data.grammar = inputs.grammar; - } - return data; -} - -static common_chat_params common_chat_params_init_seed_oss( - const common_chat_template & tmpl, - templates_params & params, - const common_chat_templates_inputs & inputs) -{ - common_chat_params data; - data.prompt = apply(tmpl, params); - data.format = COMMON_CHAT_FORMAT_SEED_OSS; - if (string_ends_with(data.prompt, "")) { - if (!inputs.enable_thinking) { - data.prompt += ""; - } else { - data.thinking_forced_open = true; - } - } - - if (params.tools.is_array() && !params.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(params.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - builder.resolve_refs(parameters); - - // Create rule for Seed-OSS function call format - std::string param_rules; - if (parameters.contains("properties")) { - for (const auto & [key, value] : parameters.at("properties").items()) { - param_rules += "\"\"" + builder.add_schema(name + "-arg-" + key, value) + - "\"\""; - } - } - - tool_rules.push_back(builder.add_rule(name + "-call", - "\"\" space \"\" space " + - param_rules + - " \"\" space \"\"")); - }); - - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "" }); - - data.preserved_tokens = { - "", "", "", "", - "", "", - }; - - builder.add_rule("root", string_join(tool_rules, " | ")); - }); - } - return data; -} - -// various workarounds for known issues with certain templates or model behaviors -// TODO @ngxson : improve this (how?) namespace workaround { // if first message is system and template does not support it, merge it with next message @@ -2989,70 +1105,8 @@ static void func_args_not_string(json & messages) { } } -static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) { - GGML_ASSERT(messages.is_array()); - for (auto & message : messages) { - if (message.contains("tool_calls")) { - auto tool_calls_new = json{ - {"tool_calls", message.at("tool_calls")} - }; - message.erase("tool_calls"); - auto content = message.at("content"); - std::string content_new = content.is_null() ? "" : content.get(); - message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace); - } - } -} - -// TODO @ngxson : we may remove support for generic schema in the future -static void use_generic_schema(json & messages) { - GGML_ASSERT(messages.is_array()); - for (auto & message : messages) { - if (message.contains("tool_calls") && message.at("tool_calls").is_array()) { - auto & tool_calls = message.at("tool_calls"); - for (auto & tool_call : tool_calls) { - if (tool_call.contains("type") && tool_call.at("type") == "function" && - tool_call.contains("function") && tool_call.at("function").is_object()) { - // Copy values before erasing to avoid use-after-free - json name_value; - json arguments_value; - json id_value; - const auto & function = tool_call.at("function"); - if (function.contains("name")) { - name_value = function.at("name"); - } - if (function.contains("arguments")) { - arguments_value = function.at("arguments"); - } - if (tool_call.contains("id")) { - id_value = tool_call.at("id"); - } - // Now safely erase and assign in the correct order - tool_call.erase("type"); - tool_call.erase("function"); - tool_call.erase("id"); - // Reassign in desired order: name, arguments, id - if (!name_value.is_null()) { - tool_call["name"] = name_value; - } - if (!arguments_value.is_null()) { - tool_call["arguments"] = arguments_value; - } - if (!id_value.is_null()) { - tool_call["id"] = id_value; - } - } - } - } - } -} - -} // namespace workaround - -static common_chat_params common_chat_templates_apply_jinja( - const struct common_chat_templates * tmpls, - const struct common_chat_templates_inputs & inputs) -{ +static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) { templates_params params; params.tools = common_chat_tools_to_json_oaicompat(inputs.tools); const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use @@ -3087,235 +1141,56 @@ static common_chat_params common_chat_templates_apply_jinja( params.json_schema = json::parse(inputs.json_schema); } - if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) { - LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n"); - params.parallel_tool_calls = false; - } else { - params.parallel_tool_calls = inputs.parallel_tool_calls; - } + // if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) { + // LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n"); + // params.parallel_tool_calls = false; + // } else { + params.parallel_tool_calls = inputs.parallel_tool_calls; + //} if (params.tools.is_array()) { if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) { throw std::runtime_error("Cannot specify grammar with tools"); } if (caps.supports_tool_calls && !caps.supports_tools) { - LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n"); + LOG_WRN( + "Template supports tool calls but does not natively describe tools. The fallback behaviour used may " + "produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n"); } } - // DeepSeek V3.1: detect based on specific patterns in the template - if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos && - params.json_schema.is_null()) { - return common_chat_params_init_deepseek_v3_1(tmpl, params); - } - - // DeepSeek R1: use handler in all cases except json schema (thinking / tools). - if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) { - return common_chat_params_init_deepseek_r1(tmpl, params); - } - - // Command R7B: : use handler in all cases except json schema (thinking / tools). - if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) { - workaround::func_args_not_string(params.messages); - return common_chat_params_init_command_r7b(tmpl, params); - } - - // Granite (IBM) - detects thinking / tools support - if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) { - workaround::func_args_not_string(params.messages); - workaround::use_generic_schema(params.messages); - workaround::move_tool_calls_to_content(params.messages); - return common_chat_params_init_granite(tmpl, params); - } - - // GLM 4.5: detect by and tags (check before Hermes since both use ) - if (src.find("[gMASK]") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - params.json_schema.is_null()) { - workaround::func_args_not_string(params.messages); - if (!params.extra_context.contains("clear_thinking")) { - // by default, do not clear reasoning_content (added since GLM-4.7) - params.extra_context["clear_thinking"] = false; - } - return common_chat_params_init_glm_4_5(tmpl, params); - } - - // Qwen3-Coder XML format detection (must come before Hermes 2 Pro) - // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates. - // Require presence of , , and blocks. - if (src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos) { - return common_chat_params_init_nemotron_v3(tmpl, params); - } - return common_chat_params_init_qwen3_coder_xml(tmpl, params); - } - - // Xiaomi MiMo format detection (must come before Hermes 2 Pro) - if (src.find("") != std::string::npos && - src.find("# Tools") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos) { - return common_chat_params_init_xiaomi_mimo(tmpl, params); - } - - // EXAONE MoE format detection - if (src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("<|tool_declare|>") != std::string::npos) { - return common_chat_params_init_exaone_moe(tmpl, params); - } - - // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools) - if (src.find("") != std::string::npos && params.json_schema.is_null()) { - return common_chat_params_init_hermes_2_pro(tmpl, params); - } - - // GPT-OSS - if (src.find("<|channel|>") != std::string::npos) { - return common_chat_params_init_gpt_oss(tmpl, params); - } - - // Seed-OSS - if (src.find("") != std::string::npos) { - workaround::func_args_not_string(params.messages); - return common_chat_params_init_seed_oss(tmpl, params, inputs); - } - - // Nemotron v2 - if (src.find("") != std::string::npos) { - return common_chat_params_init_nemotron_v2(tmpl, params); - } - - // Apertus format detection - if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) { - return common_chat_params_init_apertus(tmpl, params); - } - - // LFM2 (w/ tools) - if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos && - src.find("]<|tool_list_end|>") != std::string::npos) { - return common_chat_params_init_lfm2(tmpl, params); - } - - // MiniMax-M2 format detection - if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) { - workaround::func_args_not_string(params.messages); - return common_chat_params_init_minimax_m2(tmpl, params); - } - - // Kimi K2 format detection - if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos && - src.find("<|tool_calls_section_begin|>") != std::string::npos && - src.find("## Return of") != std::string::npos) { - return common_chat_params_init_kimi_k2(tmpl, params); - } - - // Apriel 1.5 format detection - if (src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("<|assistant|>") != std::string::npos && - src.find("<|tool_result|>") != std::string::npos && - src.find("[") != std::string::npos && - src.find("]") != std::string::npos) { - return common_chat_params_init_apriel_1_5(tmpl, params); - } - - // Solar Open - if (src.find("<|tool_response:begin|>") != std::string::npos && - src.find("<|tool_response:name|>") != std::string::npos && - src.find("<|tool_response:result|>") != std::string::npos) { - return common_chat_params_init_solar_open(tmpl, params); - } - - // Use generic handler when mixing tools + JSON schema. - // TODO: support that mix in handlers below. - if ((params.tools.is_array() && params.json_schema.is_object())) { - return common_chat_params_init_generic(tmpl, params); - } - - // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases. - if (src.find(">>>all") != std::string::npos) { - return common_chat_params_init_functionary_v3_2(tmpl, params); - } - - // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases. - if (src.find(" functools[") != std::string::npos) { - return common_chat_params_init_firefunction_v2(tmpl, params); - } - - // Functionary v3.1 (w/ tools) - if (src.find("<|start_header_id|>") != std::string::npos - && src.find("ipython<|end_header_id|>") != std::string::npos) { - auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos; - workaround::func_args_not_string(params.messages); - return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools); - } - - // Ministral/Mistral Large 3 - if (src.find("[SYSTEM_PROMPT]") != std::string::npos && - src.find("[TOOL_CALLS]") != std::string::npos && - src.find("[ARGS]") != std::string::npos) { + // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser + // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them + if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos && + src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) { + LOG_INF("Using specialized template: Ministral/Magistral Large 3\n"); return common_chat_params_init_ministral_3(tmpl, params); } - if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) { - return common_chat_params_init_magistral(tmpl, params); + // GPT-OSS - has unique channel-based structure that needs dedicated handler + if (src.find("<|channel|>") != std::string::npos) { + LOG_INF("Using specialized template: GPT-OSS\n"); + return common_chat_params_init_gpt_oss(tmpl, params); } - // Solar Open - if (src.find("<|tool_response:begin|>") != std::string::npos && - src.find("<|tool_response:name|>") != std::string::npos && - src.find("<|tool_response:result|>") != std::string::npos) { - return common_chat_params_init_solar_open(tmpl, params); + try { + LOG_INF("Using autoparser for template analysis\n"); + template_analysis_result analysis = template_analyzer::analyze_template(tmpl); + auto auto_params = universal_peg_generator::generate_parser(analysis, tmpl, params); + return auto_params; + } catch (const std::exception & e) { + LOG_WRN("Automatic parser generation failed: %s\n", e.what()); } - // TranslateGemma - if (src.find("[source_lang_code]") != std::string::npos && - src.find("[target_lang_code]") != std::string::npos) { - return common_chat_params_init_translate_gemma(tmpl, params); - } - - // Plain handler (no tools) - if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { - return common_chat_params_init_without_tools(tmpl, params); - } - - // Mistral Nemo (w/ tools) - if (src.find("[TOOL_CALLS]") != std::string::npos) { - workaround::func_args_not_string(params.messages); - return common_chat_params_init_mistral_nemo(tmpl, params); - } - - // Generic fallback - workaround::func_args_not_string(params.messages); - workaround::use_generic_schema(params.messages); - workaround::move_tool_calls_to_content(params.messages); - return common_chat_params_init_generic(tmpl, params); + GGML_ABORT("Unable to generate parser for this template."); } // Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template. -static common_chat_params common_chat_templates_apply_legacy( - const struct common_chat_templates * tmpls, - const struct common_chat_templates_inputs & inputs) -{ - size_t alloc_size = 0; +static common_chat_params common_chat_templates_apply_legacy(const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) { + size_t alloc_size = 0; std::vector chat; - std::vector contents; + std::vector contents; for (const auto & msg : inputs.messages) { auto content = msg.content; @@ -3325,25 +1200,27 @@ static common_chat_params common_chat_templates_apply_legacy( continue; } if (!content.empty()) { - content += "\n";; + content += "\n"; + ; } content += part.text; } contents.emplace_back(std::move(content)); } for (size_t i = 0; i < contents.size(); ++i) { - const auto & msg = inputs.messages[i]; + const auto & msg = inputs.messages[i]; const auto & content = contents[i]; - chat.push_back({msg.role.c_str(), content.c_str()}); + chat.push_back({ msg.role.c_str(), content.c_str() }); size_t msg_size = msg.role.size() + content.size(); - alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops + alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops } std::vector buf(alloc_size); // run the first time to get the total output length const auto & src = tmpls->template_default->source(); - int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size()); + int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, + buf.data(), buf.size()); // error: chat template is not supported if (res < 0) { @@ -3355,7 +1232,8 @@ static common_chat_params common_chat_templates_apply_legacy( // if it turns out that our buffer is too small, we resize it if ((size_t) res > buf.size()) { buf.resize(res); - res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size()); + res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), + buf.size()); } // for safety, we check the result again @@ -3373,14 +1251,75 @@ static common_chat_params common_chat_templates_apply_legacy( return params; } -common_chat_params common_chat_templates_apply( - const struct common_chat_templates * tmpls, - const struct common_chat_templates_inputs & inputs) -{ +common_chat_params common_chat_templates_apply(const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs) { GGML_ASSERT(tmpls != nullptr); - return inputs.use_jinja - ? common_chat_templates_apply_jinja(tmpls, inputs) - : common_chat_templates_apply_legacy(tmpls, inputs); + return inputs.use_jinja ? common_chat_templates_apply_jinja(tmpls, inputs) : + common_chat_templates_apply_legacy(tmpls, inputs); +} + +common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) { + return common_chat_peg_parse(syntax.parser, input, is_partial, syntax); +} + +common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, + const std::string & input, + bool is_partial, + const common_chat_syntax & syntax) { + if (parser.empty()) { + throw std::runtime_error("Failed to parse due to missing parser definition."); + } + + LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str()); + + common_peg_parse_context ctx(input, is_partial); + ctx.debug = syntax.debug; + auto result = parser.parse(ctx); + + if (result.fail()) { + // During partial parsing, return partial results if any AST nodes were captured + // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK + if (is_partial && result.end > 0) { + // Try to extract any partial results from what was successfully parsed + common_chat_msg msg; + msg.role = "assistant"; + if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) { + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + } else { + auto mapper = common_chat_peg_mapper(msg); + mapper.from_ast(ctx.ast, result); + } + if (ctx.debug) { + fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str()); + fflush(stderr); + } + return msg; + } + throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " + + input.substr(result.end)); + } + + common_chat_msg msg; + msg.role = "assistant"; + + if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) { + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + } else { + // Generic mapper + auto mapper = common_chat_peg_mapper(msg); + mapper.from_ast(ctx.ast, result); + } + if (ctx.debug) { + fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str()); + fflush(stderr); + } + + if (!is_partial) { + LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str()); + } + return msg; } std::map common_chat_templates_get_caps(const common_chat_templates * chat_templates) { diff --git a/common/chat.h b/common/chat.h index 1bf43f7261..4fec39c74f 100644 --- a/common/chat.h +++ b/common/chat.h @@ -3,12 +3,21 @@ #pragma once #include "common.h" +#include "jinja/parser.h" +#include "nlohmann/json_fwd.hpp" #include "peg-parser.h" -#include +#include "jinja/runtime.h" +#include "jinja/caps.h" +#include "nlohmann/json.hpp" + #include +#include +#include #include #include -#include + +using chat_template_caps = jinja::caps; +using json = nlohmann::ordered_json; #include @@ -38,21 +47,85 @@ struct common_chat_msg_content_part { } }; +struct common_chat_template { + jinja::program prog; + std::string bos_tok; + std::string eos_tok; + std::string src; + chat_template_caps caps; + + common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) { + jinja::lexer lexer; + auto lexer_res = lexer.tokenize(src); + this->prog = jinja::parse_from_tokens(lexer_res); + + this->src = lexer_res.source; + this->bos_tok = bos_token; + this->eos_tok = eos_token; + + this->caps = jinja::caps_get(prog); + // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str()); + } + + const std::string & source() const { return src; } + const std::string & bos_token() const { return bos_tok; } + const std::string & eos_token() const { return eos_tok; } + + // TODO: this is ugly, refactor it somehow + json add_system(const json & messages, const std::string & system_prompt) const { + GGML_ASSERT(messages.is_array()); + auto msgs_copy = messages; + if (!caps.supports_system_role) { + if (msgs_copy.empty()) { + msgs_copy.insert(msgs_copy.begin(), json{ + {"role", "user"}, + {"content", system_prompt} + }); + } else { + auto & first_msg = msgs_copy[0]; + if (!first_msg.contains("content")) { + first_msg["content"] = ""; + } + first_msg["content"] = system_prompt + "\n\n" + + first_msg["content"].get(); + } + } else { + if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") { + msgs_copy.insert(msgs_copy.begin(), json{ + {"role", "system"}, + {"content", system_prompt} + }); + } else if (msgs_copy[0].at("role") == "system") { + msgs_copy[0]["content"] = system_prompt; + } + } + return msgs_copy; + } + + chat_template_caps original_caps() const { + return caps; + } + +}; + struct common_chat_msg { - std::string role; - std::string content; + std::string role; + std::string content; std::vector content_parts; - std::vector tool_calls; - std::string reasoning_content; - std::string tool_name; - std::string tool_call_id; + std::vector tool_calls; + std::string reasoning_content; + std::string tool_name; + std::string tool_call_id; nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const; bool empty() const { - return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty(); + return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && + tool_name.empty() && tool_call_id.empty(); } - void set_tool_call_ids(std::vector & ids_cache, const std::function & gen_tool_call_id) { + + void set_tool_call_ids(std::vector & ids_cache, + const std::function & gen_tool_call_id) { for (auto i = 0u; i < tool_calls.size(); i++) { if (ids_cache.size() <= i) { auto id = tool_calls[i].id; @@ -64,32 +137,28 @@ struct common_chat_msg { tool_calls[i].id = ids_cache[i]; } } + bool operator==(const common_chat_msg & other) const { - return role == other.role - && content == other.content - && content_parts == other.content_parts - && tool_calls == other.tool_calls - && reasoning_content == other.reasoning_content - && tool_name == other.tool_name - && tool_call_id == other.tool_call_id; - } - bool operator!=(const common_chat_msg & other) const { - return !(*this == other); + return role == other.role && content == other.content && content_parts == other.content_parts && + tool_calls == other.tool_calls && reasoning_content == other.reasoning_content && + tool_name == other.tool_name && tool_call_id == other.tool_call_id; } + + bool operator!=(const common_chat_msg & other) const { return !(*this == other); } }; struct common_chat_msg_diff { - std::string reasoning_content_delta; - std::string content_delta; - size_t tool_call_index = std::string::npos; + std::string reasoning_content_delta; + std::string content_delta; + size_t tool_call_index = std::string::npos; common_chat_tool_call tool_call_delta; - static std::vector compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new); + static std::vector compute_diffs(const common_chat_msg & msg_prv, + const common_chat_msg & msg_new); bool operator==(const common_chat_msg_diff & other) const { - return content_delta == other.content_delta - && tool_call_index == other.tool_call_index - && tool_call_delta == other.tool_call_delta; + return content_delta == other.content_delta && tool_call_index == other.tool_call_index && + tool_call_delta == other.tool_call_delta; } }; @@ -107,64 +176,37 @@ enum common_chat_tool_choice { enum common_chat_format { COMMON_CHAT_FORMAT_CONTENT_ONLY, - COMMON_CHAT_FORMAT_GENERIC, - COMMON_CHAT_FORMAT_MISTRAL_NEMO, - COMMON_CHAT_FORMAT_MAGISTRAL, - COMMON_CHAT_FORMAT_LLAMA_3_X, - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_DEEPSEEK_R1, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - COMMON_CHAT_FORMAT_HERMES_2_PRO, - COMMON_CHAT_FORMAT_COMMAND_R7B, - COMMON_CHAT_FORMAT_GRANITE, - COMMON_CHAT_FORMAT_GPT_OSS, - COMMON_CHAT_FORMAT_SEED_OSS, - COMMON_CHAT_FORMAT_NEMOTRON_V2, - COMMON_CHAT_FORMAT_APERTUS, - COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, - COMMON_CHAT_FORMAT_GLM_4_5, - COMMON_CHAT_FORMAT_MINIMAX_M2, - COMMON_CHAT_FORMAT_KIMI_K2, - COMMON_CHAT_FORMAT_QWEN3_CODER_XML, - COMMON_CHAT_FORMAT_APRIEL_1_5, - COMMON_CHAT_FORMAT_XIAOMI_MIMO, - COMMON_CHAT_FORMAT_SOLAR_OPEN, - COMMON_CHAT_FORMAT_EXAONE_MOE, // These are intended to be parsed by the PEG parser COMMON_CHAT_FORMAT_PEG_SIMPLE, COMMON_CHAT_FORMAT_PEG_NATIVE, - COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, - COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats + COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; struct common_chat_templates_inputs { - std::vector messages; - std::string grammar; - std::string json_schema; - bool add_generation_prompt = true; - bool use_jinja = true; + std::vector messages; + std::string grammar; + std::string json_schema; + bool add_generation_prompt = true; + bool use_jinja = true; // Parameters below only supported when use_jinja is true - std::vector tools; - common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; - bool parallel_tool_calls = false; + std::vector tools; + common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + bool parallel_tool_calls = false; common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking" - bool enable_thinking = true; - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); - std::map chat_template_kwargs; - bool add_bos = false; - bool add_eos = false; + bool enable_thinking = true; + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + std::map chat_template_kwargs; + bool add_bos = false; + bool add_eos = false; }; struct common_chat_params { common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; std::string prompt; std::string grammar; - bool grammar_lazy = false; + bool grammar_lazy = false; bool thinking_forced_open = false; std::vector grammar_triggers; std::vector preserved_tokens; @@ -175,13 +217,14 @@ struct common_chat_params { // per-message parsing syntax // should be derived from common_chat_params struct common_chat_parser_params { - common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning" // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode) - bool reasoning_in_content = false; - bool thinking_forced_open = false; - bool parse_tool_calls = true; - common_peg_arena parser = {}; + bool reasoning_in_content = false; + bool thinking_forced_open = false; + bool parse_tool_calls = true; + bool debug = false; // Enable debug output for PEG parser + common_peg_arena parser = {}; common_chat_parser_params() = default; common_chat_parser_params(const common_chat_params & chat_params) { format = chat_params.format; @@ -194,45 +237,47 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja); void common_chat_templates_free(struct common_chat_templates * tmpls); -struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } }; +struct common_chat_templates_deleter { + void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } +}; typedef std::unique_ptr common_chat_templates_ptr; -common_chat_templates_ptr common_chat_templates_init( - const struct llama_model * model, - const std::string & chat_template_override, - const std::string & bos_token_override = "", - const std::string & eos_token_override = ""); +common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model, + const std::string & chat_template_override, + const std::string & bos_token_override = "", + const std::string & eos_token_override = ""); bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls); std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = ""); - -struct common_chat_params common_chat_templates_apply( - const struct common_chat_templates * tmpls, - const struct common_chat_templates_inputs & inputs); +struct common_chat_params common_chat_templates_apply(const struct common_chat_templates * tmpls, + const struct common_chat_templates_inputs & inputs); // Format single message, while taking into account the position of that message in chat history -std::string common_chat_format_single( - const struct common_chat_templates * tmpls, - const std::vector & past_msg, - const common_chat_msg & new_msg, - bool add_ass, - bool use_jinja); +std::string common_chat_format_single(const struct common_chat_templates * tmpls, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass, + bool use_jinja); // Returns an example of formatted chat -std::string common_chat_format_example( - const struct common_chat_templates * tmpls, - bool use_jinja, - const std::map & chat_template_kwargs); +std::string common_chat_format_example(const struct common_chat_templates * tmpls, + bool use_jinja, + const std::map & chat_template_kwargs); -const char* common_chat_format_name(common_chat_format format); +const char * common_chat_format_name(common_chat_format format); common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax); common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax); // used by arg and server -const char * common_reasoning_format_name(common_reasoning_format format); -common_reasoning_format common_reasoning_format_from_name(const std::string & format); +const char * common_reasoning_format_name(common_reasoning_format format); +common_reasoning_format common_reasoning_format_from_name(const std::string & format); +common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax); +common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, + const std::string & input, + bool is_partial, + const common_chat_syntax & syntax); common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice); @@ -251,3 +296,10 @@ nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_ // get template caps, useful for reporting to server /props endpoint std::map common_chat_templates_get_caps(const common_chat_templates * chat_templates); + +std::string common_chat_template_direct_apply( + const common_chat_template & tmpl, + const struct templates_params & inputs, + const std::optional & messages_override = std::nullopt, + const std::optional & tools_override = std::nullopt, + const std::optional & additional_context = std::nullopt); diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp index 2aa156b177..17d7eae764 100644 --- a/common/jinja/value.cpp +++ b/common/jinja/value.cpp @@ -428,6 +428,22 @@ const func_builtins & global_builtins() { bool res = it != builtins.end(); return mk_val(res); }}, + {"test_is_in", [](const func_args & args) -> value { + args.ensure_count(2, 2); + value val_needle = args.get_pos(0); + value val_haystack = args.get_pos(1); + const auto & haystack = is_val(val_haystack) ? val_haystack->as_array() : std::vector(1, val_haystack); + for (auto it = haystack.cbegin(); it != haystack.cend(); it++) { + if ((*it)->type() == val_needle->type()) { + if (is_val(val_haystack) ? + (*it)->as_string().str().find(val_needle->as_string().str()) != std::string::npos : + value_compare(*it, val_needle, value_compare_op::eq)) { + return mk_val(true); + } + } + } + return mk_val(false); + }}, {"test_is_sameas", [](const func_args & args) -> value { // Check if an object points to the same memory address as another object (void)args; @@ -715,8 +731,26 @@ const func_builtins & value_string_t::get_builtins() const { return args.get_pos(0); }}, {"tojson", tojson}, - {"indent", [](const func_args &) -> value { - throw not_implemented_exception("String indent builtin not implemented"); + {"indent", [](const func_args &args) -> value { + // no support for "first" as that would require us to somehow access generation context + args.ensure_count(2, 4); + args.ensure_vals(true, true, false, false); + + auto input = args.get_pos(0); + auto arg0 = args.get_pos(1); + + int count = arg0->as_int(); + if (count <= 0) { + throw raised_exception("indent must be a positive number"); + } + std::string indented; + for (int i = 0; i < count; i++) { + indented.append(" "); + } + indented.append(input->as_string().str()); + auto res = mk_val(indented); + res->val_str.mark_input_based_on(input->as_string()); + return res; }}, {"join", [](const func_args &) -> value { throw not_implemented_exception("String join builtin not implemented"); diff --git a/common/jinja/value.h b/common/jinja/value.h index 1c04760a08..0425bda5e3 100644 --- a/common/jinja/value.h +++ b/common/jinja/value.h @@ -617,6 +617,8 @@ struct value_undefined_t : public value_t { value_undefined_t(const std::string & h = "") : hint(h) {} virtual std::string type() const override { return hint.empty() ? "Undefined" : "Undefined (hint: '" + hint + "')"; } virtual bool is_undefined() const override { return true; } + // note: some templates use "is none" as equivalent to "is undefined" + virtual bool is_none() const override { return true; } virtual bool as_bool() const override { return false; } virtual std::string as_repr() const override { return type(); } virtual const func_builtins & get_builtins() const override; diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp index f2fc84500f..80dd105246 100644 --- a/common/peg-parser.cpp +++ b/common/peg-parser.cpp @@ -1,28 +1,32 @@ -#include "common.h" #include "peg-parser.h" -#include "json-schema-to-grammar.h" -#include "unicode.h" -#include +#include "common.h" +#include "json-schema-to-grammar.h" +#include "log.h" +#include "unicode.h" #include #include #include #include +#include #include #include #include // Trick to catch missing branches -template -inline constexpr bool is_always_false_v = false; +template inline constexpr bool is_always_false_v = false; const char * common_peg_parse_result_type_name(common_peg_parse_result_type type) { switch (type) { - case COMMON_PEG_PARSE_RESULT_FAIL: return "fail"; - case COMMON_PEG_PARSE_RESULT_SUCCESS: return "success"; - case COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT: return "need_more_input"; - default: return "unknown"; + case COMMON_PEG_PARSE_RESULT_FAIL: + return "fail"; + case COMMON_PEG_PARSE_RESULT_SUCCESS: + return "success"; + case COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT: + return "need_more_input"; + default: + return "unknown"; } } @@ -34,81 +38,88 @@ static bool is_hex_digit(const char c) { // This is used in common_peg_until_parser and to build a GBNF exclusion grammar struct trie { struct node { - size_t depth = 0; - std::map children; - bool is_word; + std::map children; + bool is_word = false; }; std::vector nodes; trie(const std::vector & words) { - create_node(); // root node - for (const auto & w : words) { - insert(w); - } + create_node(); // root node + for (const auto & w : words) { + insert(w); + } } enum match_result { NO_MATCH, PARTIAL_MATCH, COMPLETE_MATCH }; // Check if a delimiter starts at the given position match_result check_at(std::string_view sv, size_t start_pos) const { - size_t current = 0; // Start at root - size_t pos = start_pos; + size_t current = 0; // Start at root + size_t pos = start_pos; + + // LOG_DBG("%s: checking at pos %zu, sv='%s'\n", __func__, start_pos, std::string(sv).c_str()); while (pos < sv.size()) { - auto it = nodes[current].children.find(sv[pos]); + auto result = parse_utf8_codepoint(sv, pos); + if (result.status != utf8_parse_result::SUCCESS) { + break; + } + + auto it = nodes[current].children.find(result.codepoint); if (it == nodes[current].children.end()) { // Can't continue matching - return match_result{match_result::NO_MATCH}; + return match_result{ match_result::NO_MATCH }; } current = it->second; - pos++; + pos += result.bytes_consumed; // Check if we've matched a complete word if (nodes[current].is_word) { - return match_result{match_result::COMPLETE_MATCH}; + // LOG_DBG("%s: complete match found at pos %zu\n", __func__, pos); + return match_result{ match_result::COMPLETE_MATCH }; } } // Reached end of input while still in the trie (not at root) if (current != 0) { // We're in the middle of a potential match - return match_result{match_result::PARTIAL_MATCH}; + return match_result{ match_result::PARTIAL_MATCH }; } // Reached end at root (no match) - return match_result{match_result::NO_MATCH}; + return match_result{ match_result::NO_MATCH }; } struct prefix_and_next { - std::string prefix; - std::string next_chars; + std::vector prefix; + std::vector next_chars; }; std::vector collect_prefix_and_next() { - std::string prefix; + std::vector prefix; std::vector result; collect_prefix_and_next(0, prefix, result); return result; } private: - void collect_prefix_and_next(size_t index, std::string & prefix, std::vector & out) { + void collect_prefix_and_next(size_t index, std::vector & prefix, std::vector & out) { if (!nodes[index].is_word) { if (!nodes[index].children.empty()) { - std::string chars; + std::vector chars; chars.reserve(nodes[index].children.size()); for (const auto & p : nodes[index].children) { chars.push_back(p.first); } - out.emplace_back(prefix_and_next{prefix, chars}); + out.emplace_back(prefix_and_next{ prefix, chars }); } } for (const auto & p : nodes[index].children) { - unsigned char ch = p.first; - auto child = p.second; + uint32_t ch = p.first; + auto child = p.second; prefix.push_back(ch); collect_prefix_and_next(child, prefix, out); prefix.pop_back(); @@ -123,13 +134,21 @@ struct trie { void insert(const std::string & word) { size_t current = 0; - for (unsigned char ch : word) { + size_t pos = 0; + while (pos < word.length()) { + auto result = parse_utf8_codepoint(word, pos); + if (result.status != utf8_parse_result::SUCCESS) { + break; + } + + uint32_t ch = result.codepoint; + pos += result.bytes_consumed; + auto it = nodes[current].children.find(ch); if (it == nodes[current].children.end()) { - size_t child = create_node(); - nodes[child].depth = nodes[current].depth + 1; + size_t child = create_node(); nodes[current].children[ch] = child; - current = child; + current = child; } else { current = it->second; } @@ -140,14 +159,14 @@ struct trie { static std::pair parse_hex_escape(const std::string & str, size_t pos, int hex_count) { if (pos + hex_count > str.length()) { - return {0, 0}; + return { 0, 0 }; } uint32_t value = 0; for (int i = 0; i < hex_count; i++) { char c = str[pos + i]; if (!is_hex_digit(c)) { - return {0, 0}; + return { 0, 0 }; } value <<= 4; if ('a' <= c && c <= 'f') { @@ -160,53 +179,64 @@ static std::pair parse_hex_escape(const std::string & str, siz break; } } - return {value, static_cast(hex_count)}; + return { value, static_cast(hex_count) }; } static std::pair parse_char_class_char(const std::string & content, size_t pos) { if (content[pos] == '\\' && pos + 1 < content.length()) { switch (content[pos + 1]) { - case 'x': { - auto result = parse_hex_escape(content, pos + 2, 2); - if (result.second > 0) { - return {result.first, 2 + result.second}; + case 'x': + { + auto result = parse_hex_escape(content, pos + 2, 2); + if (result.second > 0) { + return { result.first, 2 + result.second }; + } + // Invalid escape, treat as literal 'x' + return { static_cast('x'), 2 }; } - // Invalid escape, treat as literal 'x' - return {static_cast('x'), 2}; - } - case 'u': { - auto result = parse_hex_escape(content, pos + 2, 4); - if (result.second > 0) { - return {result.first, 2 + result.second}; + case 'u': + { + auto result = parse_hex_escape(content, pos + 2, 4); + if (result.second > 0) { + return { result.first, 2 + result.second }; + } + // Invalid escape, treat as literal 'u' + return { static_cast('u'), 2 }; } - // Invalid escape, treat as literal 'u' - return {static_cast('u'), 2}; - } - case 'U': { - auto result = parse_hex_escape(content, pos + 2, 8); - if (result.second > 0) { - return {result.first, 2 + result.second}; + case 'U': + { + auto result = parse_hex_escape(content, pos + 2, 8); + if (result.second > 0) { + return { result.first, 2 + result.second }; + } + // Invalid escape, treat as literal 'U' + return { static_cast('U'), 2 }; } - // Invalid escape, treat as literal 'U' - return {static_cast('U'), 2}; - } - case 'n': return {'\n', 2}; - case 't': return {'\t', 2}; - case 'r': return {'\r', 2}; - case '\\': return {'\\', 2}; - case ']': return {']', 2}; - case '[': return {'[', 2}; - default: return {static_cast(content[pos + 1]), 2}; + case 'n': + return { '\n', 2 }; + case 't': + return { '\t', 2 }; + case 'r': + return { '\r', 2 }; + case '\\': + return { '\\', 2 }; + case ']': + return { ']', 2 }; + case '[': + return { '[', 2 }; + default: + return { static_cast(content[pos + 1]), 2 }; } } // Regular character - return as codepoint - return {static_cast(static_cast(content[pos])), 1}; + return { static_cast(static_cast(content[pos])), 1 }; } -static std::pair, bool> parse_char_classes(const std::string & classes) { +static std::pair, bool> parse_char_classes( + const std::string & classes) { std::vector ranges; - bool negated = false; + bool negated = false; std::string content = classes; if (content.front() == '[') { @@ -231,14 +261,14 @@ static std::pair, bool> parse_c if (i + 1 < content.length() && content[i] == '-') { // Range detected auto [end, end_len] = parse_char_class_char(content, i + 1); - ranges.push_back(common_peg_chars_parser::char_range{start, end}); + ranges.push_back(common_peg_chars_parser::char_range{ start, end }); i += 1 + end_len; } else { - ranges.push_back(common_peg_chars_parser::char_range{start, start}); + ranges.push_back(common_peg_chars_parser::char_range{ start, start }); } } - return {ranges, negated}; + return { ranges, negated }; } void common_peg_ast_arena::visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const { @@ -279,29 +309,53 @@ common_peg_parser_id common_peg_arena::get_rule(const std::string & name) const } struct parser_executor { - const common_peg_arena & arena; + const common_peg_arena & arena; common_peg_parse_context & ctx; - size_t start_pos; + size_t start_pos; - parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start) - : arena(arena), ctx(ctx), start_pos(start) {} + parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start) : + arena(arena), + ctx(ctx), + start_pos(start) {} + + std::string debug_indent() const { return std::string(ctx.parse_depth * 2, ' '); } + + std::string debug_input_snippet(size_t pos, size_t len = 60) const { + if (pos >= ctx.input.size()) { + return ""; + } + auto snippet = ctx.input.substr(pos, len); + // Escape newlines for display + std::string result; + for (char c : snippet) { + if (c == '\n') { + result += "\\n"; + } else if (c == '\r') { + result += "\\r"; + } else if (c == '\t') { + result += "\\t"; + } else { + result += c; + } + } + if (pos + len < ctx.input.size()) { + result += "..."; + } + return result; + } common_peg_parse_result operator()(const common_peg_epsilon_parser & /* p */) const { return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos); } common_peg_parse_result operator()(const common_peg_start_parser & /* p */) const { - return common_peg_parse_result( - start_pos == 0 ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, - start_pos - ); + return common_peg_parse_result(start_pos == 0 ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, + start_pos); } common_peg_parse_result operator()(const common_peg_end_parser & /* p */) const { return common_peg_parse_result( - start_pos >= ctx.input.size() ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, - start_pos - ); + start_pos >= ctx.input.size() ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, start_pos); } common_peg_parse_result operator()(const common_peg_literal_parser & p) { @@ -323,12 +377,39 @@ struct parser_executor { } common_peg_parse_result operator()(const common_peg_sequence_parser & p) { - auto pos = start_pos; + if (ctx.debug) { + LOG_DBG("%sSEQ start at %zu '%s' (%zu children)\n", debug_indent().c_str(), start_pos, + debug_input_snippet(start_pos).c_str(), p.children.size()); + } + ctx.parse_depth++; + + auto pos = start_pos; std::vector nodes; - for (const auto & child_id : p.children) { + for (size_t i = 0; i < p.children.size(); i++) { + const auto & child_id = p.children[i]; + if (ctx.debug) { + fprintf(stderr, "%sSEQ child %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str()); + } auto result = arena.parse(child_id, ctx, pos); + + if (ctx.debug) { + fprintf(stderr, "%sSEQ child %zu: %s at %zu->%zu\n", debug_indent().c_str(), i, + common_peg_parse_result_type_name(result.type), result.start, result.end); + } + if (result.fail()) { + ctx.parse_depth--; + if (ctx.is_partial && result.end >= ctx.input.size()) { + if (ctx.debug) { + fprintf(stderr, "%sSEQ -> NEED_MORE (child failed at end)\n", debug_indent().c_str()); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, + std::move(nodes)); + } + if (ctx.debug) { + fprintf(stderr, "%sSEQ -> FAIL\n", debug_indent().c_str()); + } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, result.end); } @@ -337,43 +418,93 @@ struct parser_executor { } if (result.need_more_input()) { - return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes)); + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sSEQ -> NEED_MORE\n", debug_indent().c_str()); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, + std::move(nodes)); } pos = result.end; } + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sSEQ -> SUCCESS at %zu->%zu\n", debug_indent().c_str(), start_pos, pos); + } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes)); } common_peg_parse_result operator()(const common_peg_choice_parser & p) { + if (ctx.debug) { + fprintf(stderr, "%sCHOICE start at %zu '%s' (%zu options)\n", debug_indent().c_str(), start_pos, + debug_input_snippet(start_pos).c_str(), p.children.size()); + } + ctx.parse_depth++; + auto pos = start_pos; - for (const auto & child_id : p.children) { + for (size_t i = 0; i < p.children.size(); i++) { + const auto & child_id = p.children[i]; + if (ctx.debug) { + fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str()); + } auto result = arena.parse(child_id, ctx, pos); + if (ctx.debug) { + fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, + common_peg_parse_result_type_name(result.type)); + } if (!result.fail()) { + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sCHOICE -> %s (option %zu)\n", debug_indent().c_str(), + common_peg_parse_result_type_name(result.type), i); + } return result; } } + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sCHOICE -> FAIL (no options matched)\n", debug_indent().c_str()); + } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos); } common_peg_parse_result operator()(const common_peg_repetition_parser & p) { - auto pos = start_pos; - int match_count = 0; + if (ctx.debug) { + fprintf(stderr, "%sREPEAT start at %zu '%s' (min=%d, max=%d)\n", debug_indent().c_str(), start_pos, + debug_input_snippet(start_pos).c_str(), p.min_count, p.max_count); + } + ctx.parse_depth++; + + auto pos = start_pos; + int match_count = 0; std::vector nodes; // Try to match up to max_count times (or unlimited if max_count is -1) while (p.max_count == -1 || match_count < p.max_count) { if (pos >= ctx.input.size()) { + if (ctx.debug) { + fprintf(stderr, "%sREPEAT: at end of input, count=%d\n", debug_indent().c_str(), match_count); + } break; } auto result = arena.parse(p.child, ctx, pos); + if (ctx.debug) { + fprintf(stderr, "%sREPEAT iter %d: %s at %zu->%zu, nodes=%zu\n", debug_indent().c_str(), match_count, + common_peg_parse_result_type_name(result.type), result.start, result.end, result.nodes.size()); + fprintf(stderr, "%sREPEAT CHILD: %s\n", debug_indent().c_str(), arena.dump(p.child).c_str()); + } + if (result.success()) { // Prevent infinite loop on empty matches if (result.end == pos) { + if (ctx.debug) { + fprintf(stderr, "%s REPEAT: empty match, stopping\n", debug_indent().c_str()); + } break; } @@ -391,21 +522,45 @@ struct parser_executor { nodes.insert(nodes.end(), result.nodes.begin(), result.nodes.end()); } - return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes)); + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sREPEAT -> NEED_MORE (count=%d, nodes=%zu)\n", debug_indent().c_str(), + match_count, nodes.size()); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, + std::move(nodes)); } // Child failed - stop trying + if (ctx.debug) { + fprintf(stderr, "%sREPEAT: child failed, stopping\n", debug_indent().c_str()); + } break; } // Check if we got enough matches if (p.min_count > 0 && match_count < p.min_count) { + ctx.parse_depth--; if (pos >= ctx.input.size() && ctx.is_partial) { - return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, std::move(nodes)); + if (ctx.debug) { + fprintf(stderr, "%sREPEAT -> NEED_MORE (not enough matches: %d < %d)\n", debug_indent().c_str(), + match_count, p.min_count); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, + std::move(nodes)); + } + if (ctx.debug) { + fprintf(stderr, "%sREPEAT -> FAIL (not enough matches: %d < %d)\n", debug_indent().c_str(), match_count, + p.min_count); } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos); } + ctx.parse_depth--; + if (ctx.debug) { + fprintf(stderr, "%sREPEAT -> SUCCESS (count=%d, nodes=%zu)\n", debug_indent().c_str(), match_count, + nodes.size()); + } return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes)); } @@ -463,8 +618,8 @@ struct parser_executor { } common_peg_parse_result operator()(const common_peg_chars_parser & p) const { - auto pos = start_pos; - int match_count = 0; + auto pos = start_pos; + int match_count = 0; // Try to match up to max_count times (or unlimited if max_count is -1) while (p.max_count == -1 || match_count < p.max_count) { @@ -527,7 +682,7 @@ struct parser_executor { } static common_peg_parse_result handle_escape_sequence(common_peg_parse_context & ctx, size_t start, size_t & pos) { - ++pos; // consume '\' + ++pos; // consume '\' if (pos >= ctx.input.size()) { if (!ctx.is_partial) { return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start); @@ -555,7 +710,7 @@ struct parser_executor { } static common_peg_parse_result handle_unicode_escape(common_peg_parse_context & ctx, size_t start, size_t & pos) { - ++pos; // consume 'u' + ++pos; // consume 'u' for (int i = 0; i < 4; ++i) { if (pos >= ctx.input.size()) { if (!ctx.is_partial) { @@ -617,7 +772,7 @@ struct parser_executor { trie matcher(p.delimiters); // Scan input and check for delimiters - size_t pos = start_pos; + size_t pos = start_pos; size_t last_valid_pos = start_pos; while (pos < ctx.input.size()) { @@ -638,16 +793,12 @@ struct parser_executor { return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos); } - // Check if a delimiter starts at this position auto match = matcher.check_at(ctx.input, pos); - if (match == trie::COMPLETE_MATCH) { - // Found a complete delimiter, return everything before it return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos); } if (match == trie::PARTIAL_MATCH) { - // Found a partial match extending to end of input, return everything before it return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos); } @@ -673,18 +824,12 @@ struct parser_executor { if (!result.fail()) { std::string_view text; if (result.start < ctx.input.size()) { - text = std::string_view(ctx.input).substr(result.start, result.end - result.start); + text = std::string_view(ctx.input).substr( + result.start, std::min(result.end - result.start, ctx.input.size() - result.start)); } - auto node_id = ctx.ast.add_node( - p.name, - "", - result.start, - result.end, - text, - std::move(result.nodes), - result.need_more_input() - ); + auto node_id = ctx.ast.add_node(p.name, "", result.start, result.end, text, std::move(result.nodes), + result.need_more_input()); return common_peg_parse_result(result.type, result.start, result.end, { node_id }); } @@ -694,6 +839,9 @@ struct parser_executor { common_peg_parse_result operator()(const common_peg_tag_parser & p) { // Parse the child + if (ctx.debug) { + fprintf(stderr, "%sTAG: %s\n", debug_indent().c_str(), p.tag.c_str()); + } auto result = arena.parse(p.child, ctx, start_pos); if (!result.fail()) { @@ -702,15 +850,8 @@ struct parser_executor { text = std::string_view(ctx.input).substr(result.start, result.end - result.start); } - auto node_id = ctx.ast.add_node( - "", - p.tag, - result.start, - result.end, - text, - std::move(result.nodes), - result.need_more_input() - ); + auto node_id = ctx.ast.add_node("", p.tag, result.start, result.end, text, std::move(result.nodes), + result.need_more_input()); return common_peg_parse_result(result.type, result.start, result.end, { node_id }); } @@ -740,60 +881,89 @@ common_peg_parse_result common_peg_arena::parse(common_peg_parse_context & ctx, return parse(root_, ctx, start); } -common_peg_parse_result common_peg_arena::parse(common_peg_parser_id id, common_peg_parse_context & ctx, size_t start) const { +common_peg_parse_result common_peg_arena::parse(common_peg_parser_id id, + common_peg_parse_context & ctx, + size_t start) const { // Execute parser - const auto & parser = parsers_.at(id); + const auto & parser = parsers_.at(id); parser_executor exec(*this, ctx, start); return std::visit(exec, parser); } common_peg_parser_id common_peg_arena::resolve_ref(common_peg_parser_id id) { const auto & parser = parsers_.at(id); - if (auto ref = std::get_if(&parser)) { + if (const auto *ref = std::get_if(&parser)) { return get_rule(ref->name); } return id; } +static void bfs_node(common_peg_ast_arena &arena, std::ostringstream & oss, const common_peg_ast_node & node, int indent) { + for (int i = 0; i < indent; i++) { + oss << " "; + } + oss << "NODE " << node.id; + if (!node.rule.empty()) { + oss << " (rule " << node.rule << ")"; + } + if (!node.tag.empty()) { + oss << " (tag " << node.tag << ")"; + } + oss << " ['" << node.text << "']\n"; + for (const auto child : node.children) { + bfs_node(arena, oss, arena.get(child), indent + 1); + } +} + +std::string common_peg_ast_arena::dump() { + std::ostringstream oss; + for (auto & node : nodes_) { + bfs_node(*this, oss, node, 0); + } + return oss.str(); +} + void common_peg_arena::resolve_refs() { // Walk through all parsers and replace refs with their corresponding rule IDs for (auto & parser : parsers_) { - std::visit([this](auto & p) { - using T = std::decay_t; + std::visit( + [this](auto & p) { + using T = std::decay_t; - if constexpr (std::is_same_v) { - for (auto & child : p.children) { - child = resolve_ref(child); + if constexpr (std::is_same_v) { + for (auto & child : p.children) { + child = resolve_ref(child); + } + } else if constexpr (std::is_same_v) { + for (auto & child : p.children) { + child = resolve_ref(child); + } + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { + p.child = resolve_ref(p.child); + } else if constexpr (std::is_same_v) { + p.child = resolve_ref(p.child); + } else if constexpr (std::is_same_v) { + p.child = resolve_ref(p.child); + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { + // These rules do not have children + } else { + static_assert(is_always_false_v); } - } else if constexpr (std::is_same_v) { - for (auto & child : p.children) { - child = resolve_ref(child); - } - } else if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - p.child = resolve_ref(p.child); - } else if constexpr (std::is_same_v) { - p.child = resolve_ref(p.child); - } else if constexpr (std::is_same_v) { - p.child = resolve_ref(p.child); - } else if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - // These rules do not have children - } else { - static_assert(is_always_false_v); - } - }, parser); + }, + parser); } // Also flatten root if it's a ref @@ -803,63 +973,86 @@ void common_peg_arena::resolve_refs() { } std::string common_peg_arena::dump(common_peg_parser_id id) const { + std::unordered_set visited; + return dump_impl(id, visited); +} + +std::string common_peg_arena::dump_impl(common_peg_parser_id id, + std::unordered_set & visited) const { + // Check for cycles + if (visited.count(id)) { + return "[cycle]"; + } + visited.insert(id); + const auto & parser = parsers_.at(id); - return std::visit([this](const auto & p) -> std::string { - using T = std::decay_t; + return std::visit( + [this, &visited](const auto & p) -> std::string { + using T = std::decay_t; - if constexpr (std::is_same_v) { - return "Epsilon"; - } else if constexpr (std::is_same_v) { - return "Start"; - } else if constexpr (std::is_same_v) { - return "End"; - } else if constexpr (std::is_same_v) { - return "Literal(" + p.literal + ")"; - } else if constexpr (std::is_same_v) { - std::vector parts; - for (const auto & child : p.children) { - parts.push_back(dump(child)); + if constexpr (std::is_same_v) { + return "Epsilon"; + } else if constexpr (std::is_same_v) { + return "Start"; + } else if constexpr (std::is_same_v) { + return "End"; + } else if constexpr (std::is_same_v) { + return "Literal(" + p.literal + ")"; + } else if constexpr (std::is_same_v) { + std::vector parts; + for (const auto & child : p.children) { + parts.push_back(dump_impl(child, visited)); + } + return "Sequence(" + string_join(parts, ", ") + ")"; + } else if constexpr (std::is_same_v) { + std::vector parts; + for (const auto & child : p.children) { + parts.push_back(dump_impl(child, visited)); + } + return "Choice(" + string_join(parts, ", ") + ")"; + } else if constexpr (std::is_same_v) { + if (p.max_count == -1) { + return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + + ", unbounded)"; + } + return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + ", " + + std::to_string(p.max_count) + ")"; + } else if constexpr (std::is_same_v) { + return "And(" + dump_impl(p.child, visited) + ")"; + } else if constexpr (std::is_same_v) { + return "Not(" + dump_impl(p.child, visited) + ")"; + } else if constexpr (std::is_same_v) { + return "Atomic(" + dump_impl(p.child, visited) + ")"; + } else if constexpr (std::is_same_v) { + return "Any"; + } else if constexpr (std::is_same_v) { + return "Space"; + } else if constexpr (std::is_same_v) { + if (p.max_count == -1) { + return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)"; + } + return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + + std::to_string(p.max_count) + ")"; + } else if constexpr (std::is_same_v) { + return "JsonString()"; + } else if constexpr (std::is_same_v) { + return "Until(" + string_join(p.delimiters, " | ") + ")"; + } else if constexpr (std::is_same_v) { + return "Schema(" + dump_impl(p.child, visited) + ", " + (p.schema ? p.schema->dump() : "null") + ")"; + } else if constexpr (std::is_same_v) { + return "Rule(" + p.name + ", " + dump_impl(p.child, visited) + ")"; + } else if constexpr (std::is_same_v) { + return "Ref(" + p.name + ")"; + } else if constexpr (std::is_same_v) { + return "Tag(" + p.tag + ", " + dump(p.child) + ")"; + } else if constexpr (std::is_same_v) { + return "Atomic(" + dump(p.child) + ")"; + } else { + return "Unknown"; } - return "Sequence(" + string_join(parts, ", ") + ")"; - } else if constexpr (std::is_same_v) { - std::vector parts; - for (const auto & child : p.children) { - parts.push_back(dump(child)); - } - return "Choice(" + string_join(parts, ", ") + ")"; - } else if constexpr (std::is_same_v) { - if (p.max_count == -1) { - return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", unbounded)"; - } - return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")"; - } else if constexpr (std::is_same_v) { - return "And(" + dump(p.child) + ")"; - } else if constexpr (std::is_same_v) { - return "Not(" + dump(p.child) + ")"; - } else if constexpr (std::is_same_v) { - return "Any"; - } else if constexpr (std::is_same_v) { - return "Space"; - } else if constexpr (std::is_same_v) { - if (p.max_count == -1) { - return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)"; - } - return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")"; - } else if constexpr (std::is_same_v) { - return "JsonString()"; - } else if constexpr (std::is_same_v) { - return "Until(" + string_join(p.delimiters, " | ") + ")"; - } else if constexpr (std::is_same_v) { - return "Schema(" + dump(p.child) + ", " + (p.schema ? p.schema->dump() : "null") + ")"; - } else if constexpr (std::is_same_v) { - return "Rule(" + p.name + ", " + dump(p.child) + ")"; - } else if constexpr (std::is_same_v) { - return "Ref(" + p.name + ")"; - } else { - return "Unknown"; - } - }, parser); + }, + parser); } common_peg_parser & common_peg_parser::operator=(const common_peg_parser & other) { @@ -868,25 +1061,25 @@ common_peg_parser & common_peg_parser::operator=(const common_peg_parser & other } common_peg_parser & common_peg_parser::operator+=(const common_peg_parser & other) { - id_ = builder_.sequence({id_, other.id_}); + id_ = builder_.sequence({ id_, other.id_ }); return *this; } common_peg_parser & common_peg_parser::operator|=(const common_peg_parser & other) { - id_ = builder_.choice({id_, other.id_}); + id_ = builder_.choice({ id_, other.id_ }); return *this; } common_peg_parser common_peg_parser::operator+(const common_peg_parser & other) const { - return builder_.sequence({id_, other.id_}); + return builder_.sequence({ id_, other.id_ }); } common_peg_parser common_peg_parser::operator|(const common_peg_parser & other) const { - return builder_.choice({id_, other.id_}); + return builder_.choice({ id_, other.id_ }); } common_peg_parser common_peg_parser::operator<<(const common_peg_parser & other) const { - return builder_.sequence({id_, builder_.space(), other.id_}); + return builder_.sequence({ id_, builder_.space(), other.id_ }); } common_peg_parser common_peg_parser::operator+(const char * str) const { @@ -955,7 +1148,7 @@ common_peg_parser common_peg_parser_builder::sequence(const std::vector & parsers) { @@ -987,7 +1180,7 @@ common_peg_parser common_peg_parser_builder::choice(const std::vector & parsers) { @@ -1010,36 +1203,42 @@ common_peg_parser common_peg_parser_builder::choice(std::initializer_list(schema), raw})); +common_peg_parser common_peg_parser_builder::schema(const common_peg_parser & p, + const std::string & name, + const nlohmann::ordered_json & schema, + bool raw) { + return wrap(arena_.add_parser( + common_peg_schema_parser{ p.id(), name, std::make_shared(schema), raw })); } common_peg_parser common_peg_parser_builder::rule(const std::string & name, const common_peg_parser & p, bool trigger) { auto clean_name = rule_name(name); - auto rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, p.id(), trigger}); + auto rule_id = arena_.add_parser(common_peg_rule_parser{ clean_name, p.id(), trigger }); arena_.add_rule(clean_name, rule_id); return ref(clean_name); } -common_peg_parser common_peg_parser_builder::rule(const std::string & name, const std::function & builder_fn, bool trigger) { +common_peg_parser common_peg_parser_builder::rule(const std::string & name, + const std::function & builder_fn, + bool trigger) { auto clean_name = rule_name(name); if (arena_.has_rule(clean_name)) { return ref(clean_name); } // Create placeholder rule to allow recursive references - auto placeholder = any(); // Temporary placeholder - auto placeholder_rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, placeholder.id(), trigger}); + auto placeholder = any(); // Temporary placeholder + auto placeholder_rule_id = arena_.add_parser(common_peg_rule_parser{ clean_name, placeholder.id(), trigger }); arena_.add_rule(clean_name, placeholder_rule_id); // Build the actual parser auto parser = builder_fn(); // Replace placeholder with actual rule - auto rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, parser.id(), trigger}); + auto rule_id = arena_.add_parser(common_peg_rule_parser{ clean_name, parser.id(), trigger }); arena_.rules_[clean_name] = rule_id; return ref(clean_name); @@ -1056,77 +1255,49 @@ common_peg_arena common_peg_parser_builder::build() { // JSON parsers common_peg_parser common_peg_parser_builder::json_number() { - return rule("json-number", [this]() { + return rule("json-number", [this]() { auto digit1_9 = chars("[1-9]", 1, 1); - auto digits = chars("[0-9]"); - auto int_part = choice({literal("0"), sequence({digit1_9, chars("[0-9]", 0, -1)})}); - auto frac = sequence({literal("."), digits}); - auto exp = sequence({choice({literal("e"), literal("E")}), optional(chars("[+-]", 1, 1)), digits}); - return sequence({optional(literal("-")), int_part, optional(frac), optional(exp), space()}); + auto digits = chars("[0-9]"); + auto int_part = choice({ literal("0"), sequence({ digit1_9, chars("[0-9]", 0, -1) }) }); + auto frac = sequence({ literal("."), digits }); + auto exp = sequence({ choice({ literal("e"), literal("E") }), optional(chars("[+-]", 1, 1)), digits }); + return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), space() }); }); } common_peg_parser common_peg_parser_builder::json_string() { - return rule("json-string", [this]() { - return sequence({literal("\""), json_string_content(), literal("\""), space()}); - }); + return rule("json-string", + [this]() { return sequence({ literal("\""), json_string_content(), literal("\""), space() }); }); } common_peg_parser common_peg_parser_builder::json_bool() { - return rule("json-bool", [this]() { - return sequence({choice({literal("true"), literal("false")}), space()}); - }); + return rule("json-bool", [this]() { return sequence({ choice({ literal("true"), literal("false") }), space() }); }); } common_peg_parser common_peg_parser_builder::json_null() { - return rule("json-null", [this]() { - return sequence({literal("null"), space()}); - }); + return rule("json-null", [this]() { return sequence({ literal("null"), space() }); }); } common_peg_parser common_peg_parser_builder::json_object() { return rule("json-object", [this]() { - auto ws = space(); - auto member = sequence({json_string(), ws, literal(":"), ws, json()}); - auto members = sequence({member, zero_or_more(sequence({ws, literal(","), ws, member}))}); - return sequence({ - literal("{"), - ws, - choice({ - literal("}"), - sequence({members, ws, literal("}")}) - }), - ws - }); + auto ws = space(); + auto member = sequence({ json_string(), ws, literal(":"), ws, json() }); + auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) }); + return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) }); }); } common_peg_parser common_peg_parser_builder::json_array() { return rule("json-array", [this]() { - auto ws = space(); - auto elements = sequence({json(), zero_or_more(sequence({literal(","), ws, json()}))}); - return sequence({ - literal("["), - ws, - choice({ - literal("]"), - sequence({elements, ws, literal("]")}) - }), - ws - }); + auto ws = space(); + auto elements = sequence({ json(), zero_or_more(sequence({ literal(","), ws, json() })) }); + return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) }); }); } common_peg_parser common_peg_parser_builder::json() { return rule("json-value", [this]() { - return choice({ - json_object(), - json_array(), - json_string(), - json_number(), - json_bool(), - json_null() - }); + return choice({ json_object(), json_array(), json_string(), json_number(), json_bool(), json_null() }); }); } @@ -1145,17 +1316,76 @@ common_peg_parser common_peg_parser_builder::json_member(const std::string & key }); } - -static std::string gbnf_escape_char_class(char c) { - switch (c) { - case '\n': return "\\n"; - case '\t': return "\\t"; - case '\r': return "\\r"; - case '\\': return "\\\\"; - case ']': return "\\]"; - case '[': return "\\["; - default: return std::string(1, c); +static std::string gbnf_escape_char_class(uint32_t c) { + if (c == '-' || c == ']' || c == '[' || c == '\\') { + return "\\" + std::string(1, (char) c); } + // Escape whitespace control characters + if (c == '\n') { + return "\\n"; + } + if (c == '\t') { + return "\\t"; + } + if (c == '\r') { + return "\\r"; + } + + // Printable ASCII + if (c >= 0x20 && c <= 0x7E) { + return std::string(1, (char) c); + } + + // Hex escape + char buf[16]; + const char * hex = "0123456789ABCDEF"; + + if (c <= 0xFF) { + buf[0] = '\\'; + buf[1] = 'x'; + buf[2] = hex[(c >> 4) & 0xF]; + buf[3] = hex[c & 0xF]; + buf[4] = '\0'; + } else if (c <= 0xFFFF) { + buf[0] = '\\'; + buf[1] = 'u'; + buf[2] = hex[(c >> 12) & 0xF]; + buf[3] = hex[(c >> 8) & 0xF]; + buf[4] = hex[(c >> 4) & 0xF]; + buf[5] = hex[c & 0xF]; + buf[6] = '\0'; + } else { + buf[0] = '\\'; + buf[1] = 'U'; + for (int i = 0; i < 8; i++) { + buf[2 + i] = hex[(c >> ((7 - i) * 4)) & 0xF]; + } + buf[10] = '\0'; + } + + return std::string(buf); +} + +static std::string codepoints_to_utf8(const std::vector & cps) { + std::string s; + for (uint32_t cp : cps) { + if (cp < 0x80) { + s += (char) cp; + } else if (cp < 0x800) { + s += (char) (0xC0 | (cp >> 6)); + s += (char) (0x80 | (cp & 0x3F)); + } else if (cp < 0x10000) { + s += (char) (0xE0 | (cp >> 12)); + s += (char) (0x80 | ((cp >> 6) & 0x3F)); + s += (char) (0x80 | (cp & 0x3F)); + } else { + s += (char) (0xF0 | (cp >> 18)); + s += (char) (0x80 | ((cp >> 12) & 0x3F)); + s += (char) (0x80 | ((cp >> 6) & 0x3F)); + s += (char) (0x80 | (cp & 0x3F)); + } + } + return s; } static std::string gbnf_excluding_pattern(const std::vector & strings) { @@ -1168,17 +1398,17 @@ static std::string gbnf_excluding_pattern(const std::vector & strin pattern += " | "; } - const auto & pre = pieces[i].prefix; + const auto & pre = pieces[i].prefix; const auto & chars = pieces[i].next_chars; std::string cls; - cls.reserve(chars.size()); - for (const auto & ch : chars) { + cls.reserve(chars.size() * 4); + for (uint32_t ch : chars) { cls += gbnf_escape_char_class(ch); } if (!pre.empty()) { - pattern += gbnf_format_literal(pre) + " [^" + cls + "]"; + pattern += gbnf_format_literal(codepoints_to_utf8(pre)) + " [^" + cls + "]"; } else { pattern += "[^" + cls + "]"; } @@ -1187,58 +1417,56 @@ static std::string gbnf_excluding_pattern(const std::vector & strin return "(" + pattern + ")*"; } -static std::unordered_set collect_reachable_rules( - const common_peg_arena & arena, - const common_peg_parser_id & rule -) { +static std::unordered_set collect_reachable_rules(const common_peg_arena & arena, + const common_peg_parser_id & rule) { std::unordered_set reachable; std::unordered_set visited; std::function visit = [&](common_peg_parser_id id) { const auto & parser = arena.get(id); - std::visit([&](const auto & p) { - using T = std::decay_t; + std::visit( + [&](const auto & p) { + using T = std::decay_t; - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - // These parsers do not have any children - } else if constexpr (std::is_same_v) { - for (auto child : p.children) { - visit(child); - } - } else if constexpr (std::is_same_v) { - for (auto child : p.children) { - visit(child); - } - } else if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - visit(p.child); - } else if constexpr (std::is_same_v) { - if (visited.find(p.name) == visited.end()) { - visited.insert(p.name); - reachable.insert(p.name); + if constexpr (std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v) { + // These parsers do not have any children + } else if constexpr (std::is_same_v) { + for (auto child : p.children) { + visit(child); + } + } else if constexpr (std::is_same_v) { + for (auto child : p.children) { + visit(child); + } + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { visit(p.child); + } else if constexpr (std::is_same_v) { + if (visited.find(p.name) == visited.end()) { + visited.insert(p.name); + reachable.insert(p.name); + visit(p.child); + } + } else if constexpr (std::is_same_v) { + // Traverse rules so we pick up everything + auto referenced_rule = arena.get_rule(p.name); + visit(referenced_rule); + } else { + static_assert(is_always_false_v); } - } else if constexpr (std::is_same_v) { - // Traverse rules so we pick up everything - auto referenced_rule = arena.get_rule(p.name); - visit(referenced_rule); - } else { - static_assert(is_always_false_v); - } - }, parser); + }, + parser); }; visit(rule); @@ -1251,129 +1479,136 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo std::function to_gbnf = [&](common_peg_parser_id id) -> std::string { const auto & parser = parsers_.at(id); - return std::visit([&](const auto & p) -> std::string { - using T = std::decay_t; + return std::visit( + [&](const auto & p) -> std::string { + using T = std::decay_t; - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v) { - return ""; - } else if constexpr (std::is_same_v) { - return gbnf_format_literal(p.literal); - } else if constexpr (std::is_same_v) { - std::string s; - for (const auto & child : p.children) { - if (!s.empty()) { - s += " "; + if constexpr (std::is_same_v || + std::is_same_v || std::is_same_v) { + return ""; + } else if constexpr (std::is_same_v) { + return gbnf_format_literal(p.literal); + } else if constexpr (std::is_same_v) { + std::string s; + for (const auto & child : p.children) { + if (!s.empty()) { + s += " "; + } + auto child_gbnf = to_gbnf(child); + const auto & child_parser = parsers_.at(child); + if (std::holds_alternative(child_parser) || + std::holds_alternative(child_parser) || + std::holds_alternative(child_parser) || + std::holds_alternative(child_parser)) { + s += "(" + child_gbnf + ")"; + } else { + s += child_gbnf; + } } - auto child_gbnf = to_gbnf(child); - const auto & child_parser = parsers_.at(child); + return s; + } else if constexpr (std::is_same_v) { + std::string s; + for (const auto & child : p.children) { + if (!s.empty()) { + s += " | "; + } + auto child_gbnf = to_gbnf(child); + const auto & child_parser = parsers_.at(child); + if (std::holds_alternative(child_parser)) { + s += "(" + child_gbnf + ")"; + } else { + s += child_gbnf; + } + } + return s; + } else if constexpr (std::is_same_v) { + auto child_gbnf = to_gbnf(p.child); + const auto & child_parser = parsers_.at(p.child); if (std::holds_alternative(child_parser) || - std::holds_alternative(child_parser)) { - s += "(" + child_gbnf + ")"; - } else { - s += child_gbnf; + std::holds_alternative(child_parser) || + std::holds_alternative(child_parser) || + std::holds_alternative(child_parser)) { + child_gbnf = "(" + child_gbnf + ")"; } - } - return s; - } else if constexpr (std::is_same_v) { - std::string s; - for (const auto & child : p.children) { - if (!s.empty()) { - s += " | "; + if (p.min_count == 0 && p.max_count == 1) { + return child_gbnf + "?"; } - auto child_gbnf = to_gbnf(child); - const auto & child_parser = parsers_.at(child); - if (std::holds_alternative(child_parser)) { - s += "(" + child_gbnf + ")"; - } else { - s += child_gbnf; + if (p.min_count == 0 && p.max_count == -1) { + return child_gbnf + "*"; } - } - return s; - } else if constexpr (std::is_same_v) { - auto child_gbnf = to_gbnf(p.child); - const auto & child_parser = parsers_.at(p.child); - if (std::holds_alternative(child_parser) || - std::holds_alternative(child_parser)) { - child_gbnf = "(" + child_gbnf + ")"; - } - if (p.min_count == 0 && p.max_count == 1) { - return child_gbnf + "?"; - } - if (p.min_count == 0 && p.max_count == -1) { - return child_gbnf + "*"; - } - if (p.min_count == 1 && p.max_count == -1) { - return child_gbnf + "+"; - } - if (p.max_count == -1) { - return child_gbnf + "{" + std::to_string(p.min_count) + ",}"; - } - if (p.min_count == p.max_count) { - if (p.min_count == 1) { - return child_gbnf; + if (p.min_count == 1 && p.max_count == -1) { + return child_gbnf + "+"; } - return child_gbnf + "{" + std::to_string(p.min_count) + "}"; - } - return child_gbnf + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; - } else if constexpr (std::is_same_v || std::is_same_v) { - return ""; // Lookahead not supported in GBNF - } else if constexpr (std::is_same_v) { - return "."; - } else if constexpr (std::is_same_v) { - return "space"; - } else if constexpr (std::is_same_v) { - std::string result = p.pattern; - if (p.min_count == 0 && p.max_count == 1) { - return result + "?"; - } - if (p.min_count == 0 && p.max_count == -1) { - return result + "*"; - } - if (p.min_count == 1 && p.max_count == -1) { - return result + "+"; - } - if (p.max_count == -1) { - return result + "{" + std::to_string(p.min_count) + ",}"; - } - if (p.min_count == p.max_count) { - if (p.min_count == 1) { - return result; + if (p.max_count == -1) { + return child_gbnf + "{" + std::to_string(p.min_count) + ",}"; } - return result + "{" + std::to_string(p.min_count) + "}"; - } - return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; - } else if constexpr (std::is_same_v) { - return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)"; - } else if constexpr (std::is_same_v) { - if (p.delimiters.empty()) { - return ".*"; - } - return gbnf_excluding_pattern(p.delimiters); - } else if constexpr (std::is_same_v) { - if (p.schema) { - if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && p.schema->at("type") == "string") { - // TODO: Implement more comprehensive grammar generation for raw strings. - // For now, use the grammar emitted from the underlying parser. - return to_gbnf(p.child); + if (p.min_count == p.max_count) { + if (p.min_count == 1) { + return child_gbnf; + } + return child_gbnf + "{" + std::to_string(p.min_count) + "}"; } - return builder.add_schema(p.name, *p.schema); + return child_gbnf + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; + } else if constexpr (std::is_same_v || + std::is_same_v) { + return ""; // Lookahead not supported in GBNF + } else if constexpr (std::is_same_v) { + return "."; + } else if constexpr (std::is_same_v) { + return "space"; + } else if constexpr (std::is_same_v) { + std::string result = p.pattern; + if (p.min_count == 0 && p.max_count == 1) { + return result + "?"; + } + if (p.min_count == 0 && p.max_count == -1) { + return result + "*"; + } + if (p.min_count == 1 && p.max_count == -1) { + return result + "+"; + } + if (p.max_count == -1) { + return result + "{" + std::to_string(p.min_count) + ",}"; + } + if (p.min_count == p.max_count) { + if (p.min_count == 1) { + return result; + } + return result + "{" + std::to_string(p.min_count) + "}"; + } + return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; + } else if constexpr (std::is_same_v) { + return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)"; + } else if constexpr (std::is_same_v) { + if (p.delimiters.empty()) { + return ".*"; + } + return gbnf_excluding_pattern(p.delimiters); + } else if constexpr (std::is_same_v) { + if (p.schema) { + if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && + p.schema->at("type") == "string") { + // TODO: Implement more comprehensive grammar generation for raw strings. + // For now, use the grammar emitted from the underlying parser. + return to_gbnf(p.child); + } + return builder.add_schema(p.name, *p.schema); + } + return to_gbnf(p.child); + } else if constexpr (std::is_same_v) { + return p.name; + } else if constexpr (std::is_same_v) { + // Refs should not exist after flattening, but kept just in case + return p.name; + } else if constexpr (std::is_same_v) { + return to_gbnf(p.child); + } else if constexpr (std::is_same_v) { + return to_gbnf(p.child); + } else { + static_assert(is_always_false_v); } - return to_gbnf(p.child); - } else if constexpr (std::is_same_v) { - return p.name; - } else if constexpr (std::is_same_v) { - // Refs should not exist after flattening, but kept just in case - return p.name; - } else if constexpr (std::is_same_v) { - return to_gbnf(p.child); - } else if constexpr (std::is_same_v) { - return to_gbnf(p.child); - } else { - static_assert(is_always_false_v); - } - }, parser); + }, + parser); }; // Collect reachable rules @@ -1432,80 +1667,121 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo static nlohmann::json serialize_parser_variant(const common_peg_parser_variant & variant) { using json = nlohmann::json; - return std::visit([](const auto & p) -> json { - using T = std::decay_t; + return std::visit( + [](const auto & p) -> json { + using T = std::decay_t; - if constexpr (std::is_same_v) { - return json{{"type", "epsilon"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "start"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "end"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "literal"}, {"literal", p.literal}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "sequence"}, {"children", p.children}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "choice"}, {"children", p.children}}; - } else if constexpr (std::is_same_v) { - return json{ - {"type", "repetition"}, - {"child", p.child}, - {"min_count", p.min_count}, - {"max_count", p.max_count} - }; - } else if constexpr (std::is_same_v) { - return json{{"type", "and"}, {"child", p.child}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "not"}, {"child", p.child}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "any"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "space"}}; - } else if constexpr (std::is_same_v) { - json ranges = json::array(); - for (const auto & range : p.ranges) { - ranges.push_back({{"start", range.start}, {"end", range.end}}); + if constexpr (std::is_same_v) { + return json{ + { "type", "epsilon" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "start" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "end" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "literal" }, + { "literal", p.literal } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "sequence" }, + { "children", p.children } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "choice" }, + { "children", p.children } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "repetition" }, + { "child", p.child }, + { "min_count", p.min_count }, + { "max_count", p.max_count } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "and" }, + { "child", p.child } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "not" }, + { "child", p.child } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "any" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "space" } + }; + } else if constexpr (std::is_same_v) { + json ranges = json::array(); + for (const auto & range : p.ranges) { + ranges.push_back({ + { "start", range.start }, + { "end", range.end } + }); + } + return json{ + { "type", "chars" }, + { "pattern", p.pattern }, + { "ranges", ranges }, + { "negated", p.negated }, + { "min_count", p.min_count }, + { "max_count", p.max_count } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "json_string" } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "until" }, + { "delimiters", p.delimiters } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "schema" }, + { "child", p.child }, + { "name", p.name }, + { "schema", p.schema ? *p.schema : nullptr }, + { "raw", p.raw } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "rule" }, + { "name", p.name }, + { "child", p.child }, + { "trigger", p.trigger } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "ref" }, + { "name", p.name } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "atomic" }, + { "child", p.child } + }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "tag" }, + { "child", p.child }, + { "tag", p.tag } + }; } - return json{ - {"type", "chars"}, - {"pattern", p.pattern}, - {"ranges", ranges}, - {"negated", p.negated}, - {"min_count", p.min_count}, - {"max_count", p.max_count} - }; - } else if constexpr (std::is_same_v) { - return json{{"type", "json_string"}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "until"}, {"delimiters", p.delimiters}}; - } else if constexpr (std::is_same_v) { - return json{ - {"type", "schema"}, - {"child", p.child}, - {"name", p.name}, - {"schema", p.schema ? *p.schema : nullptr}, - {"raw", p.raw} - }; - } else if constexpr (std::is_same_v) { - return json{ - {"type", "rule"}, - {"name", p.name}, - {"child", p.child}, - {"trigger", p.trigger} - }; - } else if constexpr (std::is_same_v) { - return json{{"type", "ref"}, {"name", p.name}}; - } else if constexpr (std::is_same_v) { - return json{{"type", "atomic"}, {"child", p.child}}; - } else if constexpr (std::is_same_v) { - return json{ - {"type", "tag"}, - {"child", p.child}, - {"tag", p.tag} - }; - } - }, variant); + }, + variant); } nlohmann::json common_peg_arena::to_json() const { @@ -1514,9 +1790,9 @@ nlohmann::json common_peg_arena::to_json() const { parsers.push_back(serialize_parser_variant(parser)); } return nlohmann::json{ - {"parsers", parsers}, - {"rules", rules_}, - {"root", root_} + { "parsers", parsers }, + { "rules", rules_ }, + { "root", root_ } }; } @@ -1540,41 +1816,38 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json if (!j.contains("literal") || !j["literal"].is_string()) { throw std::runtime_error("literal parser missing or invalid 'literal' field"); } - return common_peg_literal_parser{j["literal"]}; + return common_peg_literal_parser{ j["literal"] }; } if (type == "sequence") { if (!j.contains("children") || !j["children"].is_array()) { throw std::runtime_error("sequence parser missing or invalid 'children' field"); } - return common_peg_sequence_parser{j["children"].get>()}; + return common_peg_sequence_parser{ j["children"].get>() }; } if (type == "choice") { if (!j.contains("children") || !j["children"].is_array()) { throw std::runtime_error("choice parser missing or invalid 'children' field"); } - return common_peg_choice_parser{j["children"].get>()}; + return common_peg_choice_parser{ j["children"].get>() }; } if (type == "repetition") { if (!j.contains("child") || !j.contains("min_count") || !j.contains("max_count")) { throw std::runtime_error("repetition parser missing required fields"); } - return common_peg_repetition_parser{ - j["child"].get(), - j["min_count"].get(), - j["max_count"].get() - }; + return common_peg_repetition_parser{ j["child"].get(), j["min_count"].get(), + j["max_count"].get() }; } if (type == "and") { if (!j.contains("child")) { throw std::runtime_error("and parser missing 'child' field"); } - return common_peg_and_parser{j["child"].get()}; + return common_peg_and_parser{ j["child"].get() }; } if (type == "not") { if (!j.contains("child")) { throw std::runtime_error("not parser missing 'child' field"); } - return common_peg_not_parser{j["child"].get()}; + return common_peg_not_parser{ j["child"].get() }; } if (type == "any") { return common_peg_any_parser{}; @@ -1583,23 +1856,20 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json return common_peg_space_parser{}; } if (type == "chars") { - if (!j.contains("pattern") || !j.contains("ranges") || !j.contains("negated") || - !j.contains("min_count") || !j.contains("max_count")) { + if (!j.contains("pattern") || !j.contains("ranges") || !j.contains("negated") || !j.contains("min_count") || + !j.contains("max_count")) { throw std::runtime_error("chars parser missing required fields"); } common_peg_chars_parser parser; - parser.pattern = j["pattern"]; - parser.negated = j["negated"]; + parser.pattern = j["pattern"]; + parser.negated = j["negated"]; parser.min_count = j["min_count"]; parser.max_count = j["max_count"]; for (const auto & range_json : j["ranges"]) { if (!range_json.contains("start") || !range_json.contains("end")) { throw std::runtime_error("char_range missing 'start' or 'end' field"); } - parser.ranges.push_back({ - range_json["start"].get(), - range_json["end"].get() - }); + parser.ranges.push_back({ range_json["start"].get(), range_json["end"].get() }); } return parser; } @@ -1610,7 +1880,7 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json if (!j.contains("delimiters") || !j["delimiters"].is_array()) { throw std::runtime_error("until parser missing or invalid 'delimiters' field"); } - return common_peg_until_parser{j["delimiters"].get>()}; + return common_peg_until_parser{ j["delimiters"].get>() }; } if (type == "schema") { if (!j.contains("child") || !j.contains("name") || !j.contains("schema") || !j.contains("raw")) { @@ -1618,7 +1888,7 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json } common_peg_schema_parser parser; parser.child = j["child"].get(); - parser.name = j["name"]; + parser.name = j["name"]; if (!j["schema"].is_null()) { parser.schema = std::make_shared(j["schema"]); } @@ -1629,17 +1899,14 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json if (!j.contains("name") || !j.contains("child") || !j.contains("trigger")) { throw std::runtime_error("rule parser missing required fields"); } - return common_peg_rule_parser{ - j["name"].get(), - j["child"].get(), - j["trigger"].get() - }; + return common_peg_rule_parser{ j["name"].get(), j["child"].get(), + j["trigger"].get() }; } if (type == "ref") { if (!j.contains("name") || !j["name"].is_string()) { throw std::runtime_error("ref parser missing or invalid 'name' field"); } - return common_peg_ref_parser{j["name"]}; + return common_peg_ref_parser{ j["name"] }; } if (type == "atomic") { if (!j.contains("child")) { diff --git a/common/peg-parser.h b/common/peg-parser.h index 1cd640365f..5d08cf6d47 100644 --- a/common/peg-parser.h +++ b/common/peg-parser.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -111,6 +112,8 @@ class common_peg_ast_arena { void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const; void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const; + + std::string dump(); }; struct common_peg_parse_result { @@ -139,6 +142,7 @@ struct common_peg_parse_result { struct common_peg_parse_context { std::string input; bool is_partial; + bool debug = false; // Enable debug output for parser tracing common_peg_ast_arena ast; int parse_depth; @@ -299,6 +303,8 @@ class common_peg_arena { friend class common_peg_parser_builder; private: + std::string dump_impl(common_peg_parser_id id, std::unordered_set & visited) const; + common_peg_parser_id add_parser(common_peg_parser_variant parser); void add_rule(const std::string & name, common_peg_parser_id id); diff --git a/docs/autoparser.md b/docs/autoparser.md new file mode 100644 index 0000000000..3c77c4d304 --- /dev/null +++ b/docs/autoparser.md @@ -0,0 +1,513 @@ +# Unified Auto-Parser Architecture + +The auto-parser automatically analyzes chat templates to determine how to parse model outputs, including content, reasoning, and tool calls. + +## Overview + +The unified auto-parser uses a two-phase incremental analysis approach: + +1. **Phase 1: Content & Reasoning Analysis** - Analyzes how the template handles basic content and reasoning, without considering tools +2. **Phase 2: Tool Call Analysis** - Analyzes tool calling patterns, layered on top of Phase 1 + +## Data Structures + +### content_structure (Phase 1 Result) + +Describes how the template handles content and reasoning: + +```cpp +struct content_structure { + enum reasoning_mode_type { + REASONING_NONE, // No reasoning markers detected + REASONING_OPTIONAL, // ... may appear before content + REASONING_FORCED_OPEN, // Template ends with open reasoning tag OR starts implicitly (empty start, present end) + }; + + reasoning_mode_type reasoning_mode = REASONING_NONE; + std::string reasoning_start; // e.g., "", "<|START_THINKING|>" + std::string reasoning_end; // e.g., "", "<|END_THINKING|>" + + // Content wrapping mode + enum content_mode_type { + CONTENT_PLAIN, // No content markers + CONTENT_ALWAYS_WRAPPED, // ... always present + CONTENT_WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present + }; + + content_mode_type content_mode = CONTENT_PLAIN; + std::string content_start; // e.g., "", "<|START_RESPONSE|>" + std::string content_end; // e.g., "", "<|END_RESPONSE|>" +}; +``` + +### tool_call_structure (Phase 2 Result) + +Describes how the template formats tool calls: + +```cpp +struct tool_call_structure { + bool supports_tools = false; + + // Container markers (what wraps all tool calls) + std::string tool_section_start; // e.g., "", "[TOOL_CALLS]", "", "" + std::string tool_section_end; // e.g., "", "]", "", "" + + // Function format (how individual functions are structured) + enum function_format { + FUNC_JSON_OBJECT, // {"name": "X", "arguments": {...}} + FUNC_TAG_WITH_NAME, // {...} + FUNC_TAG_NAME_ONLY, // ... where X is function name (rare) + FUNC_PREFIXED_INDEXED, // <|tool_call_begin|>functions.X:0<|tool_call_argument_begin|>{...}<|tool_call_end|> + FUNC_NAME_AS_KEY, // [{"function_name": {...arguments...}}] (Apertus-style) + FUNC_BRACKET_TAG, // [TOOL_CALLS]X[CALL_ID]id[ARGS]{...} (Mistral Small 3.2 style) + FUNC_RECIPIENT_BASED, // >>>recipient\n{content} where recipient is "all" (content) or function name (tools) + FUNC_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[{"tool_name": "X", ...}]\n``` (Cohere Command-R Plus) + }; + function_format function_format = FUNC_JSON_OBJECT; + + // For FUNC_JSON_OBJECT format - field names (may vary between templates) + std::string name_field = "name"; // Could be "tool_name", "function" + std::string args_field = "arguments"; // Could be "parameters", "params", "input" + std::string id_field; // Optional: "id", "tool_call_id", "" + + // For FUNC_TAG_WITH_NAME format + std::string function_prefix; // e.g., "" + std::string function_close; // e.g., "" + + // For FUNC_PREFIXED_INDEXED format (e.g., Kimi-K2) + std::string per_call_start; // e.g., "<|tool_call_begin|>" + std::string function_namespace; // e.g., "functions." (prefix before function name) + std::string args_marker; // e.g., "<|tool_call_argument_begin|>" + std::string per_call_end; // e.g., "<|tool_call_end|>" + + // For FUNC_BRACKET_TAG format (e.g., Mistral Small 3.2) + std::string id_marker; // e.g., "[CALL_ID]" - marker before tool call ID + + // For FUNC_MARKDOWN_CODE_BLOCK format (Cohere Command-R Plus) + std::string code_block_marker; // e.g., "Action:" - text marker before code block + std::string code_block_language; // e.g., "json" - language identifier in code fence + + // Argument format (how arguments are structured within a function) + enum argument_format { + ARGS_JSON, // Standard JSON object: {"key": "value", ...} + ARGS_TAGGED, // XML-style: value + ARGS_KEY_VALUE_TAGS, // keyvalue (GLM-4.6) + }; + argument_format argument_format = ARGS_JSON; + + // For ARGS_TAGGED format + std::string arg_prefix; // e.g., "" + std::string arg_close; // e.g., "", "" + std::string arg_separator; // e.g., "", "\n" + + // Flag: template renders null content as "None" string, requires empty string instead + bool requires_nonnull_content = false; +}; +``` + +## Analysis Flow + +```console +Template + | + v +Phase 1: analyze_content_structure() + |-- detect_reasoning_markers() - compare outputs with reasoning_content vs without + |-- detect_content_markers() - render with content and detect wrapping + |-- detect_reasoning_mode() - check if prompt ends with open tag + | + v +content_structure + | + v +Phase 2: analyze_tool_structure() + |-- Check minja.supports_tool_calls + |-- Differential analysis for tool patterns + |-- Classify function format (JSON vs tagged) + |-- Classify argument format (JSON vs tagged) + | + v +tool_call_structure + | + v +generate_parser(content_structure, tool_call_structure) + |-- build_reasoning_block(content_structure) + |-- build_content_block(content_structure) + |-- build_tool_section(tool_call_structure, tools) + |-- Compose into final parser + | + v +common_chat_params (parser, grammar, triggers, preserved_tokens) +``` + +## Entry Point + +The mechanism starts in `common/chat.cpp`, in `common_chat_templates_apply_jinja`: + +```cpp +// 1. Analyze the template (two-phase) +template_analysis_result analysis = template_analyzer::analyze_template(tmpl); + +// 2. Generate the parser and grammar +auto auto_params = universal_peg_generator::generate_parser(analysis, tmpl, params); + +// 3. Use if it provides more than basic content handling +if (auto_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY || + auto_params.thinking_forced_open || + !auto_params.parser.empty()) { + return auto_params; +} +``` + +## Builder Methods + +The unified builder (`common_chat_peg_unified_builder`) provides high-level methods: + +- `build_reasoning_block(cs, reasoning_format, thinking_forced_open)` - Build reasoning parser +- `build_content_block(cs, reasoning_format)` - Build content parser +- `build_tool_section(ts, tools, parallel_tool_calls, force_tool_calls)` - Build tool section +- `build_function(ts, name, schema)` - Build single function parser +- `build_arguments(ts, schema)` - Build arguments parser + +## Key Templates Supported + +- **Granite** - `` + `` with tool calls +- **Nemotron** - JSON tools with `` wrapper +- **Qwen/Hermes** - XML-style `` format +- **Command-R7B** - `<|START_THINKING|>`/`<|START_RESPONSE|>` + `<|START_ACTION|>` tools +- **DeepSeek R1** - Forced thinking + complex tools +- **Mistral Nemo** - `[TOOL_CALLS]` wrapper +- **MiniMax** - `` wrapper with XML tools +- **GLM-4.6** - `` + `name\n......` format +- **Kimi-K2** - `FUNC_PREFIXED_INDEXED` format with namespace and indices +- **Mistral Small 3.2** - `FUNC_BRACKET_TAG` format with `[TOOL_CALLS]` markers +- **Functionary v3.2** - `FUNC_RECIPIENT_BASED` format with `>>>` routing + +## Files + +| File | Purpose | +|------|---------| +| `common/chat-auto-parser.h` | Data structures and API declarations | +| `common/chat-auto-parser-analyzer.cpp` | Phase 1 and Phase 2 analysis implementation | +| `common/chat-auto-parser-generator.cpp` | PEG parser generator | +| `common/chat-auto-parser-helpers.h/cpp` | Shared helper functions | +| `common/chat-peg-parser.h/cpp` | Unified builder and mapper classes | +| `common/chat.cpp` | Main entry point and wire-up | + +## Algorithm Details + +### Phase 1: Content & Reasoning Analysis + +#### Reasoning Detection (4 Methods) + +**Method 1: Differential Reasoning Content Analysis** + +- Render template with `reasoning_content` field present vs absent +- Compare outputs to find markers between `THOUGHT_MARKER` and `CONTENT_MARKER` +- If only closing tag found, derive opening tag using patterns: + - XML: `` → `` + - Special tokens: `<|END_X|>` → `<|START_X|>`, `<|/X|>` → `<|X|>` +- Handles various tag formats including XML and special token formats + +**Method 2: Enable-Thinking Toggle Analysis** + +- Toggle `enable_thinking` context variable between true/false +- Detects differences in generated prompts +- Handles two scenarios: + - **Normal case**: enable_thinking=true adds reasoning markers + - **Reverse case**: enable_thinking=false adds empty thinking block (GLM-4.6 style) +- Uses string difference analysis to extract markers +- Validates extracted tags against blacklist of role markers + +**Method 3: Prompt Ending Analysis** + +- Checks if prompt ends with unclosed reasoning tag +- Looks for trailing tags in prompt with `enable_thinking=true` +- Differentiates between open tags (``) and close tags (``) +- Handles blacklisted tags (role markers, system tokens) +- Validates reasoning-like patterns (contains "think", "reason", "thought") + +**Method 4: Adjacent Tag Pair Detection** + +- Looks for patterns like ``, `<|START_THINKING|><|END_THINKING|>`, `[think][/think]` +- Searches for predefined tag patterns in prompt +- Validates tags are adjacent with only whitespace between +- Supports both simple and complex token formats + +#### Content Detection Algorithm + +1. **Dual-Mode Rendering**: Render template with content marker in both thinking-enabled and thinking-disabled modes +2. **Pattern Matching**: Search for known content wrapper patterns: + - `<|START_RESPONSE|>` / `<|END_RESPONSE|>` + - `` / `` + - `` / `` + - `` / `` + - `<|CHATBOT_TOKEN|>` / `<|END_OF_TURN_TOKEN|>` +3. **Mode Classification**: + - `CONTENT_ALWAYS_WRAPPED`: Found in both thinking modes + - `CONTENT_WRAPPED_WITH_REASONING`: Found only with thinking enabled + - `CONTENT_PLAIN`: No wrapping detected + +#### Reasoning Mode Detection + +- **REASONING_FORCED_OPEN**: + - **Explicit**: Prompt ends with reasoning start marker (e.g., ``). + - **Implicit**: reasoning end marker is present but start marker is empty (e.g., `[BEGIN FINAL RESPONSE]`). +- **REASONING_OPTIONAL**: Markers present but not forced. +- **REASONING_NONE**: No markers detected. + +### Phase 2: Tool Call Structure Analysis + +#### Differential Analysis Algorithm + +**Test Payload Strategy**: + +1. **Base**: User + Assistant with content only (no tools) +2. **Tool 1**: User + Assistant with tool_calls (empty args) +3. **Tool 2**: User + Assistant with tool_calls (with args) +4. **Tool 3**: User + Assistant with multiple tool calls + +**Pattern Extraction Process**: + +1. Compute string differences between base and tool outputs +2. Use `test_function_name` as reliable search anchor (using `rfind` for last occurrence) +3. Extract structural elements: + - `tool_call_opener`: Common prefix before function name + - `tool_call_closer`: Common suffix after function calls + - `function_opener`: Tag immediately before function name + - `function_closer`: Tag after function content + - `parameter_key_prefix/suffix`: Argument wrapping patterns + +#### Format Classification Logic + +**FORMAT_JSON_NATIVE**: + +- Detected by `{"name":` pattern in `tool_call_opener` +- Or XML markers with JSON structure + +**FORMAT_XML_CONSTRUCTED**: + +- `function_opener` starts with `<` +- No substantial parameter markers + +**FORMAT_RECIPIENT_BASED**: + +- `tool_call_start_marker == function_opener` +- No parameter markers +- Opener doesn't start with structural chars + +**FORMAT_BRACKET_TAG**: + +- `function_name_suffix` contains bracket tags like `[CALL_ID]...[ARGS]` +- `tool_call_start_marker` matches `[TOOL_CALLS]` pattern + +**FORMAT_PREFIXED_INDEXED**: + +- `function_opener` ends with `.` (namespace separator) +- `function_name_suffix` starts with `:` followed by digit +- Example: `functions.name:0<|tool_call_argument_begin|>` + +#### Specialized Format Handling + +**FUNC_PREFIXED_INDEXED (Kimi-K2)**: + +- Splits `function_opener` at last `>` to get `per_call_start` + `function_namespace` +- Extracts `args_marker` from `function_name_suffix` +- Derives `per_call_end` by matching structural patterns in `tool_call_closer` + +**FUNC_TAG_WITH_NAME (Functionary/Nemotron)**: + +- Detects nested vs non-nested formats +- Uses overlap detection between `tool_section_start` and `function_prefix` +- Handles double-wrapping prevention + +**ARGS_KEY_VALUE_TAGS (GLM-4.6)**: + +- Detects `keyvalue` pattern +- Cleans up suffix to extract just the key closer + +**FUNC_RECIPIENT_BASED (Functionary v3.2)**: + +- Detects `>>>` recipient delimiter format +- Routes to "all" for content, function name for tools +- Uses same delimiter for both content and tool routing + +**FUNC_BRACKET_TAG (Mistral Small 3.2/Devstral)**: + +- Detects `[TOOL_CALLS]function_name[ARGS]{...}` pattern +- Optional `[CALL_ID]id` marker for tool call identification +- No section wrapper - each call starts independently + +### Generator Algorithms + +#### Unified Parser Building + +**Composition Strategy**: + +```cpp +// Standard format +sequence({ reasoning, space(), content, space(), tools, space(), content, end() }) + +// With section markers +sequence({ reasoning, space(), content_until(section_start), space(), tools, space(), content, end() }) + +// Forced thinking handling +optional(reasoning) when thinking_forced_open && tools present +``` + +**Trigger Word Detection**: + +- Uses `tool_section_start` as primary trigger +- Falls back to `function_prefix` or `per_call_start` +- Raw JSON uses regex pattern trigger + +**Lazy Grammar Optimization**: + +- Enabled by default for performance +- Disabled when thinking forced open +- Disabled when no clear trigger word exists + +## Testing & Debugging + +### Comprehensive Test Coverage + +The test suite covers: + +**Reasoning Models**: + +- Qwen-QwQ-32B (forced-open thinking) +- DeepSeek R1 variants (reasoning only) +- IBM Granite (reasoning + tools) +- ByteDance Seed-OSS (custom reasoning tags) +- Ministral-3-14B-Reasoning +- llama-cpp-deepseek-r1 + +**Tool Call Formats**: + +- JSON: Llama 3.x, Mistral Nemo, Hermes, MiMo-VL +- XML: Nemotron, Qwen3-Coder, MiniMax +- Tagged: GLM-4.6 (key-value tags) +- Bracket-tag: Mistral Small 3.2, Devstral +- Prefixed-indexed: Kimi-K2 variants +- Name-as-key: Apertus-8B +- Recipient-based: Functionary v3.2 + +**Edge Cases**: + +- Streaming/partial parsing +- Empty content with tools +- Parallel tool calls +- Forced thinking mode +- Multi-byte Unicode markers +- Null content handling +- Multi-line code in tool arguments +- Custom reasoning tags (ByteDance Seed-OSS) + +### Debug Tools + +**Template Debugger**: `tests/debug-template-parser.cpp` + +- Usage: `./bin/debug-template-parser path/to/template.jinja` +- Shows detected format, markers, generated parser, and GBNF grammar + +**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2` + +- Shows detailed analysis steps +- Displays pattern extraction results +- Lists generated parser structure + +**PEG Test Builder**: Fluent API for creating test cases + +```cpp +auto tst = peg_tester("template.jinja"); +tst.test("input") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({tool}) + .expect(expected_message) + .run(); +``` + +## Adding Support for New Templates + +To support a new template format: + +1. **If it follows standard patterns** - The auto-parser should detect it automatically +2. **If it has unique markers** - Add the markers to the detection patterns in: + - `detect_reasoning_markers()` for reasoning tags + - `detect_content_markers()` for content wrappers + - `extract_patterns_from_differences()` for tool call patterns +3. **If it needs special handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block + +## Edge Cases and Quirks + +1. **Forced Thinking**: If `enable_thinking` is true but the model has already started a thought block (e.g., ended the prompt with ``), the parser enters "forced thinking" mode where it immediately expects reasoning content. +2. **Ambiguous Content**: Templates that mix content and tool calls without clear delimiters can be tricky. The analyzer tries to find "common" start/end patterns across multiple examples to be robust. +3. **Double Wrapping**: Some templates (e.g., Functionary) use the same string for both the tool section start and the function prefix (e.g., `name\n......` format | +| Kimi-K2 / Kimi-K2-Instruct / Kimi-K2-Thinking | `FUNC_PREFIXED_INDEXED` | `functions.name:0` with special markers | +| Apertus-8B-Instruct | `FUNC_NAME_AS_KEY` | `{"function_name": {...}}` format | +| MiniMax-M2 | `FUNC_TAG_WITH_NAME` | XML invoke with parameter tags | +| NVIDIA-Nemotron-Nano-v2 | `FUNC_JSON_OBJECT` | `` wrapper (nested) | +| Mistral-Nemo-Instruct-2407 | `FUNC_JSON_OBJECT` | `[TOOL_CALLS]` wrapper with id field | +| Functionary v3.1 | `FUNC_TAG_WITH_NAME` | `` non-nested format | +| Functionary v3.2 | `FUNC_RECIPIENT_BASED` | `>>>` recipient delimiter format | +| MiMo-VL / Hermes 3 / Qwen 2.5 | `FUNC_JSON_OBJECT` | `` wrapper | +| Apriel 1.5 | `FUNC_JSON_OBJECT` | `` wrapper with JSON array | +| Apriel 1.6 Thinker | Reasoning only | Implicit reasoning start | +| Cohere Command-R7B | `FUNC_JSON_OBJECT` | `START_RESPONSE/ACTION/THINKING` markers | +| Mistral Small 3.2 | `FUNC_BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` with ID | +| Devstral | `FUNC_BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` without ID | +| Ministral-3-14B-Reasoning | Custom reasoning | `[THINK]...[/THINK]` tags | +| IBM Granite | `FUNC_JSON_OBJECT` | `` + `` | +| ByteDance Seed-OSS | `FUNC_TAG_WITH_NAME` | Custom `` and `` tags | +| Qwen3-Coder | `FUNC_TAG_WITH_NAME` | XML-style tool format | +| Cohere Command-R Plus | `FUNC_MARKDOWN_CODE_BLOCK` | `Action:\n\`\`\`json\n[...]\n\`\`\`` format | + +### Currently Unsupported Templates + +| Template Family | Model / Variant | Issue Description | +|-----------------|-----------------|-------------------| +| **OpenAI** | `GPT-OSS` | Complex channel markers need new format | + +### Templates Without Tool Support + +Some templates genuinely don't support tool calls (this is not a detection bug): + +- **Phi 3.5 Mini** - The official template has no tool handling. Use Phi-4-mini-instruct for function calling, or community fine-tuned versions. +- **Google Gemma 2 2B** - Pure instruction-following model without tool capabilities. + +### TODO / Roadmap + +- [ ] **Fix OpenAI GPT-OSS**: Add `FUNC_CHANNEL_BASED` format for channel marker structure. +- [x] **~~Fix Cohere Command-R Plus~~**: Added `FUNC_MARKDOWN_CODE_BLOCK` format for `Action:\n\`\`\`json` structure. + +### Recent Additions (Dec 2025 - Jan 2026) + +- **FUNC_RECIPIENT_BASED**: Support for Functionary v3.2's `>>>` recipient delimiter format +- **FUNC_BRACKET_TAG**: Support for Mistral Small 3.2 and Devstral's `[TOOL_CALLS]...` format +- **Enhanced Content Detection**: Better handling of custom reasoning tags and content wrappers +- **Improved Streaming Support**: Better handling of partial parsing for all supported formats +- **Custom Tag Support**: Support for non-standard reasoning tags like `` (ByteDance) +- **Multi-line Tool Arguments**: Better parsing of complex tool arguments with code blocks +- **FUNC_MARKDOWN_CODE_BLOCK**: Support for Cohere Command-R Plus markdown code block format +- **Implicit Reasoning Support**: Support for templates where reasoning starts implicitly without a start marker. + +The auto-parser now successfully handles 25+ different template formats across reasoning-only, tool-calling, and hybrid models, with comprehensive test coverage ensuring robust parsing across streaming and non-streaming scenarios. diff --git a/docs/development/parsing.md b/docs/development/parsing.md index dbb989bf08..e627ea6502 100644 --- a/docs/development/parsing.md +++ b/docs/development/parsing.md @@ -22,7 +22,7 @@ Below is a contrived example demonstrating how to use the PEG parser to parse output from a model that emits arguments as JSON. ```cpp -auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) { +auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { // Build a choice of all available tools auto tool_choice = p.choice(); for (const auto & tool : tools) { @@ -212,7 +212,7 @@ mapper.from_ast(ctx.ast, result); ### Native -The `common_chat_peg_native_builder` builds a `native` parser suitable for +The `common_chat_peg_unified_builder` builds a `native` parser suitable for models that emit tool arguments as a direct JSON object. - **`reasoning(p)`** - Tag node for `reasoning_content` @@ -225,7 +225,7 @@ models that emit tool arguments as a direct JSON object. - **`tool_args(p)`** - Tag the tool arguments ```cpp -build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) { +build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto get_weather_tool = p.tool(p.sequence({ p.tool_open(p.literal("{")), p.json_member("name", "\"" + p.tool_name(p.literal("get_weather")) + "\""), @@ -246,7 +246,7 @@ build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) { ### Constructed -The `common_chat_peg_constructed_builder` builds a `constructed` parser +The `common_chat_peg_unified_builder` builds a `constructed` parser suitable for models that emit tool arguments as separate entities, such as XML tags. @@ -264,7 +264,7 @@ tags. - **`tool_arg_json_value(p)`** - Tag JSON value for the argument ```cpp -build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) { +build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto location_arg = p.tool_arg( p.tool_arg_open(""), p.tool_arg_string_value(p.until("")), diff --git a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja new file mode 100755 index 0000000000..9df29255b7 --- /dev/null +++ b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja @@ -0,0 +1,173 @@ +{# ---------------------------------------------------------------------- #} +{# ƛƬ Default setup and flags #} +{# ---------------------------------------------------------------------- #} +{# FIX: Use "is defined" check BEFORE accessing the variable #} +{%- set messages = messages if (messages is defined and messages) else [] -%} +{%- set tools = tools if (tools is defined and tools) else [] -%} +{%- set add_generation_prompt = add_generation_prompt if (add_generation_prompt is defined) else false -%} +{%- set available_tool_string = '' -%} +{%- set add_tool_id = true -%} +{%- set add_thoughts = true -%} {# whether to include reasoning blocks #} +{%- set add_generation_prompt = true -%} {# whether to emit reasoning starter before assistant response #} +{# Optional token placeholders (safe defaults) #} +{%- set bos_token = bos_token if (bos_token is defined) else '' -%} +{%- set eos_token = eos_token if (eos_token is defined) else '' -%} +{# ---------------------------------------------------------------------- #} +{# Core reasoning prompt and assistant reasoning prefix #} +{# ---------------------------------------------------------------------- #} +{%- set reasoning_prompt -%} + You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +{%- endset -%} +{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%} +{# ---------------------------------------------------------------------- #} +{# Tool list and tool call output format #} +{# ---------------------------------------------------------------------- #} +{%- if tools|length > 0 -%} + {%- set available_tool_string -%} + You are provided with function signatures within XML tags. + You may call one or more functions to assist with the user query. + Don't make assumptions about the arguments. You should infer the argument values from previous + user responses and the system message. + Here are the available tools: + + {% for tool in tools %}{{ tool|string }}{% endfor %} + + . + + Return all function calls as a list of JSON objects within XML tags. + Each JSON object should contain a function name and arguments as follows: + [ + {"name": , "arguments": }, + {"name": , "arguments": }, + ... + ] + {%- endset -%} +{%- endif -%} +{# ---------------------------------------------------------------------- #} +{# Start system block if first message is not system #} +{# ---------------------------------------------------------------------- #} +{%- if messages|length > 0 and messages[0]['role'] != 'system' -%} + {%- if tools|length > 0 -%} + {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }} + {%- else -%} + {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }} + {%- endif -%} +{%- endif -%} +{# ---------------------------------------------------------------------- #} +{# Iterate through messages #} +{# ---------------------------------------------------------------------- #} +{%- for message in messages -%} + + {# ---------------- USER MESSAGE ---------------- #} + {%- if message['role'] == 'user' -%} + {{ '<|begin_user|>\n' }} + {%- if message['content'] is not string -%} + {%- for chunk in message['content'] -%} + {%- if chunk['type'] == 'text' -%} + {{ chunk['text'] }} + {%- elif chunk['type'] in ['image', 'image_url'] -%} + {{ '[IMG]' }} + {%- else -%} + {{ raise_exception('Unrecognized content type!') }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ message['content'] }} + {%- endif -%} + + {# ---------------- SYSTEM MESSAGE ---------------- #} + {%- elif message['role'] == 'system' -%} + {%- set sys_content = message.get('content', '') -%} + {%- if sys_content and sys_content|length > 0 -%} + {%- if sys_content is string -%} + {%- set system_message = sys_content -%} + {%- else -%} + {%- set system_message = sys_content[0]['text'] -%} + {%- endif -%} + {%- else -%} + {%- set system_message = '' -%} + {%- endif -%} + + {%- if tools|length > 0 -%} + {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }} + {%- else -%} + {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }} + {%- endif -%} + + {# ---------------- ASSISTANT MESSAGE ---------------- #} + {%- elif message['role'] == 'assistant' -%} + {%- if loop.last -%} + {%- set add_tool_id = false -%} + {%- endif -%} + + {{ '\n<|begin_assistant|>\n' }} + + {%- if add_thoughts and message.get('reasoning_content') and loop.last -%} + {{ message['reasoning_content'] + '\n[BEGIN FINAL RESPONSE]\n' }} + {%- endif -%} + + {%- set asst_content = message.get('content', '') -%} + {%- if asst_content and asst_content|length > 0 -%} + {%- if asst_content is not string -%} + {%- set asst_text = asst_content[0]['text'] -%} + {%- else -%} + {%- set asst_text = asst_content -%} + {%- endif -%} + {# For historical turns (not the last), strip reasoning and keep only final response #} + {%- if not loop.last and '[BEGIN FINAL RESPONSE]' in asst_text -%} + {{- asst_text.split('[BEGIN FINAL RESPONSE]')[-1] | trim -}} + {%- else -%} + {{- asst_text -}} + {%- endif -%} + {%- elif message.get('chosen') and message['chosen']|length > 0 -%} + {{ message['chosen'][0] }} + {%- endif -%} + + {# Tool call output #} + {%- set tool_calls = message.get('tool_calls', []) -%} + {%- if tool_calls and tool_calls|length > 0 -%} + {{ '\n[' }} + {%- for tool_call in tool_calls -%} + {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string }} + {%- if add_tool_id == true and 'id' in tool_call -%} + {{ ', "id": "' + tool_call['id'] + '"' }} + {%- endif -%} + {{ '}' }} + {%- if not loop.last -%}{{ ', ' }}{%- endif -%} + {%- endfor -%} + {{ ']' }} + {%- endif -%} + + {%- set training_prompt = training_prompt if (training_prompt is defined) else false -%} + {%- if not loop.last or training_prompt -%} + {{ '\n<|end|>\n' }} + {%- endif -%} + + {# ---------------- TOOL RESULT MESSAGE ---------------- #} + {%- elif message['role'] == 'tool' -%} + {%- set tool_content = message.get('content', '') -%} + {%- if tool_content is string -%} + {%- set tool_message = tool_content -%} + {%- else -%} + {%- set tool_message = tool_content[0]['text'] if tool_content else '' -%} + {%- endif -%} + {{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }} + + {# ---------------- CONTENT MESSAGE ---------------- #} + {%- elif message['role'] == 'content' -%} + {%- set msg_content = message.get('content', '') -%} + {%- if msg_content is not string -%} + {{ '<|begin_content|>\n' + msg_content[0]['text'] + '\n' }} + {%- else -%} + {{ '<|begin_content|>\n' + msg_content + '\n' }} + {%- endif -%} + {%- endif -%} + + {# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #} + {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%} + {{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }} + {%- endif -%} + +{%- endfor -%} diff --git a/models/templates/Bielik-11B-v3.0-Instruct.jinja b/models/templates/Bielik-11B-v3.0-Instruct.jinja new file mode 100644 index 0000000000..40ef50076e --- /dev/null +++ b/models/templates/Bielik-11B-v3.0-Instruct.jinja @@ -0,0 +1,77 @@ +{{ bos_token }} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content'] %} + {%- set loop_start_index = 1 %} +{%- else %} + {%- set system_message = "" %} + {%- set loop_start_index = 0 %} +{%- endif %} + +{%- if system_message or tools %} + {{- '<|im_start|>system\n' }} + {%- if system_message %} + {{- system_message }} + {%- endif %} + {%- if tools %} + {{- '\n\nMasz dostęp do następujących narzędzi. Definicje narzędzi znajdują się poniżej wewnątrz znaczników <|function_list|>:\n<|function_list|>\n' }} + {{- '[' }} + {%- for tool in tools %} + {{- tool | tojson }} + {%- if not loop.last %} + {{- ',\n' }} + {%- endif %} + {%- endfor %} + {{- ']\n<|function_list|>\n\nAby wywołać narzędzie, użyj formatu {"name": "nazwa_narzędzia", "arguments": {"argument": "wartość"}}. Wyniki działania narzędzi zostaną przekazane z markerem <|function_output|>.\n' }} + {%- endif %} + {%- if enable_thinking %} + {{- '\n\nZanim odpowiesz na pytanie, najpierw przemyśl swoje kroki i umieść swoje myśli wewnątrz tagów .... Musisz najpierw pomyśleć, zanim udzielisz odpowiedzi. ' -}} + {{- 'WAŻNE: Powinieneś myśleć w tym samym języku, co pytanie użytkownika. Jeśli pytanie jest zadane po polsku, powinieneś również myśleć po polsku. Jeśli pytanie jest po angielsku, myślisz również po angielsku itd. ' }} + {{- '** PAMIĘTAJ! ** Pytanie po polsku -> myślenie po polsku -> odpowiedź po polsku!' -}} + {{- '*** BARDZO WAŻNE!!! *** Jesteś Bielikiem, polskim modelem językowym. Twoją główną cechą jest umiejętność pisania po polsku. Jeśli użytkownik zadaje Ci pytania po polsku, ZAWSZE odpowiadaj po polsku. ' -}} + {{- 'Nawet, jeśli korzystasz z narzędzia, którego większość instrukcji jest po angielsku, powinieneś przede wszystkim odpowiadać po polsku, jeśli użytkownik zadaje pytanie w tym języku. ' -}} + {%- endif %} + {{- '<|im_end|>\n' }} +{%- endif %} + +{%- for message in messages[loop_start_index:] %} + {%- if message['role'] == 'user' %} + {{- '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }} + {%- elif message['role'] == 'assistant' %} + {{- '<|im_start|>assistant\n' }} + {%- set content = message.content | default('') %} + {%- set reasoning_content = message.reasoning_content | default('') %} + {%- if not reasoning_content and '' in content and '' in content %} + {%- set reasoning_parts = content.split('') %} + {%- set reasoning_content = reasoning_parts[0].split('')[-1] %} + {%- set content = reasoning_parts[1:] | join('') %} + {%- endif %} + {%- if reasoning_content %} + {{- '\n' + reasoning_content.strip() + '\n\n' }} + {%- endif %} + {{- content.lstrip() }} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message['role'] == 'tool' %} + {%- if loop.index0 == 0 or messages[loop.index0 - 1]['role'] != 'tool' %} + {{- '<|im_start|>user\n' }} + {%- endif %} + {{- '<|function_output|>' + message['content'] }} + {%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} + +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking %} + {{- '\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/models/templates/GLM-4.7-Flash.jinja b/models/templates/GLM-4.7-Flash.jinja new file mode 100644 index 0000000000..2ab98ef068 --- /dev/null +++ b/models/templates/GLM-4.7-Flash.jinja @@ -0,0 +1,86 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%} +{{ '' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '' }} +{%- endif -%} +{%- if content.strip() -%} +{{ content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{- '' + tc.name -}} +{% set _args = tc.arguments %}{% for k, v in _args.items() %}{{ k }}{{ v | tojson(ensure_ascii=False) if v is not string else v }}{% endfor %}{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '' }} +{{- m.content }} +{{- '' }} +{%- else -%} +<|observation|>{% for tr in m.content %} +{{ tr.output if tr.output is defined else tr }}{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|>{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/models/templates/LFM2-8B-A1B.jinja b/models/templates/LFM2-8B-A1B.jinja new file mode 100644 index 0000000000..3738b3d145 --- /dev/null +++ b/models/templates/LFM2-8B-A1B.jinja @@ -0,0 +1,47 @@ +{{- bos_token -}} +{%- set system_prompt = "" -%} +{%- set ns = namespace(system_prompt="") -%} +{%- if messages[0]["role"] == "system" -%} + {%- set ns.system_prompt = messages[0]["content"] -%} + {%- set messages = messages[1:] -%} +{%- endif -%} +{%- if tools -%} + {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "You can use the following tools: <|tool_list_start|>[" -%} + {%- for tool in tools -%} + {%- if tool is not string -%} + {%- set tool = tool | tojson -%} + {%- endif -%} + {%- set ns.system_prompt = ns.system_prompt + tool -%} + {%- if not loop.last -%} + {%- set ns.system_prompt = ns.system_prompt + ", " -%} + {%- endif -%} + {%- endfor -%} + {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%} + {{- '**IMPORTANT**: The syntax for calling the tools is: <|tool_call_start|>JSON tool call goes here<|tool_call_end|>. Please only call tools in the specified manner.' -}} +{%- endif -%} +{%- if ns.system_prompt -%} + {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}} +{%- endif -%} +{%- for message in messages -%} + {{- "<|im_start|>" + message["role"] + "\n" -}} + {%- set content = message["content"] -%} + {%- if content is not string -%} + {%- set content = content | tojson -%} + {%- endif -%} + {%- if message["role"] == "tool" -%} + {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%} + {%- elif message["role"] == "assistant" -%} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n<|tool_call_start|>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n<|tool_call_end|>\n' }} + {%- endfor %} + {%- endif %} + {%- endif -%} + {{- content + "<|im_end|>\n" -}} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{- "<|im_start|>assistant\n" -}} +{%- endif -%} diff --git a/models/templates/Qwen3-Coder.jinja b/models/templates/Qwen3-Coder.jinja index 49b0e8d0ee..cde8c0e43d 100644 --- a/models/templates/Qwen3-Coder.jinja +++ b/models/templates/Qwen3-Coder.jinja @@ -29,7 +29,7 @@ {%- endif %} {%- endif %} {%- if tools is iterable and tools | length > 0 %} - {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }} + {{- "\n\n# Tools\n\nYou have access to the following tools:\n\n" }} {{- "" }} {%- for tool in tools %} {%- if tool.function is defined %} @@ -63,7 +63,7 @@ {{- '\n' }} {%- endfor %} {{- "\n" }} - {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} + {{- '\n\nIf you choose to call a tool ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nvalue_2\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag.\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} {%- endif %} {%- if system_message is defined %} {{- '<|im_end|>\n' }} diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja index c2066bd739..299f7a7ff1 100644 --- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja @@ -1 +1,44 @@ -{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file +{% if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = false -%} +{%- endif -%} +{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%} +{%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- set ns.system_prompt = message['content'] -%} + {%- endif -%} +{%- endfor -%}{{bos_token}}{{ns.system_prompt}} +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is none -%} + {%- set ns.is_tool = false -%} + {%- for tool in message['tool_calls']-%} + {%- if not ns.is_first -%}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- set ns.is_first = true -%} + {%- else -%}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is not none -%} + {%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} + {%- set ns.is_tool = false -%} + {%- else -%} + {%- set content = message['content'] -%} + {%- if '' in content -%} + {%- set content = content.split('')[-1] -%} + {%- endif -%}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}} + {%- endif -%} + {%- endif -%} + {%- if message['role'] == 'tool' -%} + {%- set ns.is_tool = true -%} + {%- if ns.is_output_first -%}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- set ns.is_output_first = false -%} + {%- else -%}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- endif -%} + {%- endif -%} +{%- endfor -%} +{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>'}} +{%- endif -%} +{%- if add_generation_prompt and not ns.is_tool -%}{{'<|Assistant|>\n'}} +{%- endif %} \ No newline at end of file diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja index c2066bd739..fff2b755e2 100644 --- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja @@ -1 +1,47 @@ -{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file +{% if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = false -%} +{%- endif -%} +{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%} +{%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- set ns.system_prompt = message['content'] -%} + {%- endif -%} +{%- endfor -%}{{bos_token}}{{ns.system_prompt}} +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is none -%} + {%- set ns.is_tool = false -%} + {%- for tool in message['tool_calls']-%} + {%- if not ns.is_first -%} + {{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- set ns.is_first = true -%} + {%- else -%} + {{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} + {%- endif -%} + {%- endfor -%} + {{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is not none -%} + {%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} + {%- set ns.is_tool = false -%} + {%- else -%} + {%- set content = message['content'] -%} + {%- if '' in content -%} + {%- set content = content.split('')[-1] -%} + {%- endif -%}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}} + {%- endif -%} + {%- endif -%} + {%- if message['role'] == 'tool' -%} + {%- set ns.is_tool = true -%} + {%- if ns.is_output_first -%}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- set ns.is_output_first = false -%} + {%- else -%}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- endif -%} + {%- endif -%} +{%- endfor -%} +{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>'}} +{%- endif -%} +{%- if add_generation_prompt and not ns.is_tool -%}{{'<|Assistant|>\n'}} +{%- endif %} \ No newline at end of file diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja index e5656196a3..6ef7fb123c 100644 --- a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja +++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja @@ -1,3 +1,71 @@ -{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' +{% if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = false -%} +{%- endif -%} +{%- if not thinking is defined -%} + {%- if enable_thinking is defined -%} + {%- set thinking = enable_thinking -%} + {%- else -%} + {%- set thinking = false -%} + {%- endif -%} +{%- endif -%} +{%- set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) -%} +{%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- if ns.is_first_sp -%} + {%- set ns.system_prompt = ns.system_prompt + message['content'] -%} + {%- set ns.is_first_sp = false -%} + {%- else -%} + {%- set ns.system_prompt = ns.system_prompt + ' -' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{''}} {%- else %}{{''}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '' in content %}{%- set content = content.split('', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{''}}{%- else %}{{''}}{%- endif %}{% endif %} \ No newline at end of file +' + message['content'] -%} + {%- endif -%} + {%- endif -%} +{%- endfor -%}{{ bos_token }}{{ ns.system_prompt }} +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {%- set ns.is_tool = false -%} + {%- set ns.is_first = false -%} + {%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none -%} + {%- if ns.is_last_user -%}{{'<|Assistant|>'}} + {%- endif -%} + {%- set ns.is_last_user = false -%} + {%- set ns.is_first = false -%} + {%- set ns.is_tool = false -%} + {%- for tool in message['tool_calls'] -%} + {%- if not ns.is_first -%} + {%- if message['content'] is none -%}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- else -%}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- endif -%} + {%- set ns.is_first = true -%} + {%- else -%}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- endif -%} + {%- endfor -%}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} + {%- endif -%} + {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) -%} + {%- if ns.is_last_user -%}{{'<|Assistant|>'}} + {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{''}} + {%- else -%}{{''}} + {%- endif -%} + {%- endif -%} + {%- set ns.is_last_user = false -%} + {%- if ns.is_tool -%}{{message['content'] + '<|end▁of▁sentence|>'}} + {%- set ns.is_tool = false -%} + {%- else -%} + {%- set content = message['content'] -%} + {%- if '' in content -%} + {%- set content = content.split('', 1)[1] -%} + {%- endif -%}{{content + '<|end▁of▁sentence|>'}} + {%- endif -%} + {%- endif -%} + {%- if message['role'] == 'tool' -%} + {%- set ns.is_last_user = false -%} + {%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} + {%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<|Assistant|>'}} + {%- if not thinking -%}{{''}} + {%- else -%}{{''}} + {%- endif -%} +{%- endif %} \ No newline at end of file diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja index ecb49a2108..e286d8a7b5 100644 --- a/models/templates/moonshotai-Kimi-K2.jinja +++ b/models/templates/moonshotai-Kimi-K2.jinja @@ -1,43 +1,43 @@ -{%- if tools -%} - <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|> -{%- endif -%} -{%- for message in messages -%} - {%- if loop.first and messages[0]['role'] != 'system' -%} - <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|> - {%- endif -%} - {%- if message['role'] == 'system' -%} - <|im_system|>system<|im_middle|> - {%- elif message['role'] == 'user' -%} - <|im_user|>user<|im_middle|> - {%- elif message['role'] == 'assistant' -%} - <|im_assistant|>assistant<|im_middle|> - {%- elif message['role'] == 'tool' -%} - <|im_system|>tool<|im_middle|> - {%- endif -%} - {%- if message['role'] == 'assistant' and message.get('tool_calls') -%} - {%- if message['content'] -%}{{ message['content'] }}{%- endif -%} - <|tool_calls_section_begin|> - {%- for tool_call in message['tool_calls'] -%} - {%- set func_name = tool_call['function']['name'] -%} - {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%} - <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|> - {%- endfor -%} - <|tool_calls_section_end|> - {%- elif message['role'] == 'tool' -%} - ## Return of {{ message.tool_call_id }}\n{{ message['content'] }} - {%- elif message['content'] is string -%} - {{ message['content'] }} - {%- elif message['content'] is not none -%} - {% for content in message['content'] -%} - {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} - <|media_start|>image<|media_content|><|media_pad|><|media_end|> - {% else -%} - {{ content['text'] }} - {%- endif -%} - {%- endfor -%} - {%- endif -%} - <|im_end|> -{%- endfor -%} -{%- if add_generation_prompt -%} - <|im_assistant|>assistant<|im_middle|> -{%- endif -%} +{%- if tools -%} + <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|> +{%- endif -%} +{%- for message in messages -%} + {%- if loop.first and messages[0]['role'] != 'system' -%} + <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|> + {%- endif -%} + {%- if message['role'] == 'system' -%} + <|im_system|>system<|im_middle|> + {%- elif message['role'] == 'user' -%} + <|im_user|>user<|im_middle|> + {%- elif message['role'] == 'assistant' -%} + <|im_assistant|>assistant<|im_middle|> + {%- elif message['role'] == 'tool' -%} + <|im_system|>tool<|im_middle|> + {%- endif -%} + {%- if message['role'] == 'assistant' and message.get('tool_calls') -%} + {%- if message['content'] -%}{{ message['content'] }}{%- endif -%} + <|tool_calls_section_begin|> + {%- for tool_call in message['tool_calls'] -%} + {%- set func_name = tool_call['function']['name'] -%} + {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%} + <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|> + {%- endfor -%} + <|tool_calls_section_end|> + {%- elif message['role'] == 'tool' -%} + ## Return of {{ message.tool_call_id }}\n{{ message['content'] }} + {%- elif message['content'] is string -%} + {{ message['content'] }} + {%- elif message['content'] is not none -%} + {% for content in message['content'] -%} + {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} + <|media_start|>image<|media_content|><|media_pad|><|media_end|> + {% else -%} + {{ content['text'] }} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + <|im_end|> +{%- endfor -%} +{%- if add_generation_prompt -%} + <|im_assistant|>assistant<|im_middle|> +{%- endif -%} diff --git a/models/templates/unsloth-Apriel-1.5.jinja b/models/templates/unsloth-Apriel-1.5.jinja index 29e582fbf6..8e59d2f1d4 100644 --- a/models/templates/unsloth-Apriel-1.5.jinja +++ b/models/templates/unsloth-Apriel-1.5.jinja @@ -86,19 +86,19 @@ Prior to generating the function calls, you should generate the reasoning for wh {%- set add_tool_id = false -%} {%- endif -%} {{- '<|assistant|>\n' -}} - {%- if message['content'] is not none and message['content']|length > 0 -%} + {%- if message['content'] is defined and message['content'] is not none and message['content']|length > 0 -%} {%- if message['content'] is not string and message['content'][0]['text'] is not none %} {{- message['content'][0]['text'] }} {%- else %} {{- message['content'] -}} {%- endif -%} - {%- elif message['chosen'] is not none and message['chosen']|length > 0 -%} + {%- elif message['chosen'] is defined and message['chosen'] is not none and message['chosen']|length > 0 -%} {{- message['chosen'][0] -}} {%- endif -%} {%- if add_thoughts and 'thought' in message and message['thought'] is not none -%} {{- '' + message['thought'] + '' -}} {%- endif -%} - {%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%} + {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%} {{- '\n[' -}} {%- for tool_call in message["tool_calls"] -%} {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}} diff --git a/scripts/server-bench.py b/scripts/server-bench.py index dbbb0939ff..2ef7258712 100755 --- a/scripts/server-bench.py +++ b/scripts/server-bench.py @@ -230,7 +230,7 @@ def benchmark( logger.info("") logger.info(f"Benchmark duration: {token_t_last:.2f} s") - logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min") + logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last / 60):.2f} requests/min") logger.info(f"Total prompt length: {np.sum(prompt_n)} tokens") logger.info(f"Average prompt length: {np.mean(prompt_n):.2f} tokens") logger.info(f"Average prompt latency: {1e3 * np.mean(prompt_t):.2f} ms") diff --git a/scripts/server-test-model.py b/scripts/server-test-model.py new file mode 100644 index 0000000000..9049d80279 --- /dev/null +++ b/scripts/server-test-model.py @@ -0,0 +1,202 @@ +import argparse +import json +import requests +import logging +import sys + +handler = logging.StreamHandler(sys.stdout) +handler.terminator = "" # ← no newline +logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler]) +logger = logging.getLogger("server-test-model") + + +def run_query(url, messages, tools=None, stream=False, tool_choice=None): + payload = { + "messages": messages, + "stream": stream, + "max_tokens": 5000, + } + if tools: + payload["tools"] = tools + if tool_choice: + payload["tool_choice"] = tool_choice + + try: + response = requests.post(url, json=payload, stream=stream) + response.raise_for_status() + except requests.exceptions.RequestException as e: + if e.response is not None: + logger.info(f"Response error: {e} for {e.response.content}\n") + else: + logger.info(f"Error connecting to server: {e}\n") + return None + + full_content = "" + reasoning_content = "" + tool_calls = [] + + if stream: + logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n") + for line in response.iter_lines(): + if line: + decoded_line = line.decode("utf-8") + if decoded_line.startswith("data: "): + data_str = decoded_line[6:] + if data_str == "[DONE]": + break + try: + data = json.loads(data_str) + if "choices" in data and len(data["choices"]) > 0: + delta = data["choices"][0].get("delta", {}) + + # Content + content_chunk = delta.get("content", "") + if content_chunk: + full_content += content_chunk + logger.info(content_chunk) + + # Reasoning + reasoning_chunk = delta.get("reasoning_content", "") + if reasoning_chunk: + reasoning_content += reasoning_chunk + logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m") + + # Tool calls + if "tool_calls" in delta: + for tc in delta["tool_calls"]: + index = tc.get("index") + if index is not None: + while len(tool_calls) <= index: + # Using "function" as type default but could be flexible + tool_calls.append( + { + "id": "", + "type": "function", + "function": { + "name": "", + "arguments": "", + }, + } + ) + + if "id" in tc: + tool_calls[index]["id"] += tc["id"] + if "function" in tc: + if "name" in tc["function"]: + tool_calls[index]["function"][ + "name" + ] += tc["function"]["name"] + if "arguments" in tc["function"]: + tool_calls[index]["function"][ + "arguments" + ] += tc["function"]["arguments"] + + except json.JSONDecodeError: + logger.info(f"Failed to decode JSON: {data_str}\n") + logger.info("\n--- End of Stream ---\n") + else: + logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n") + data = response.json() + if "choices" in data and len(data["choices"]) > 0: + message = data["choices"][0].get("message", {}) + full_content = message.get("content", "") + reasoning_content = message.get("reasoning_content", "") + tool_calls = message.get("tool_calls", []) + logger.info(full_content) + logger.info("--- End of Response ---\n") + + return { + "content": full_content, + "reasoning_content": reasoning_content, + "tool_calls": tool_calls, + } + + +def test_chat(url, stream): + logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n") + messages = [{"role": "user", "content": "What is the capital of France?"}] + result = run_query(url, messages, stream=stream) + + if result: + if result["content"]: + logger.info("PASS: Output received.\n") + else: + logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n") + + if result.get("reasoning_content"): + logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n") + else: + logger.info("INFO: No reasoning content detected (Standard model behavior).\n") + else: + logger.info("FAIL: No result.\n") + + +def test_tool_call(url, stream): + logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n") + messages = [ + { + "role": "user", + "content": "What is the weather in London? Please use the get_weather tool.", + } + ] + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + }, + } + ] + + result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream) + + if result: + tcs = result.get("tool_calls") + if tcs and len(tcs) > 0: + logger.info("PASS: Tool calls detected.") + for tc in tcs: + func = tc.get("function", {}) + logger.info(f" Tool: {func.get('name')}, Args: {func.get('arguments')}\n") + else: + logger.info(f"FAIL: No tool calls. Content: {result['content']}\n") + + if result.get("reasoning_content"): + logger.info( + f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n" + ) + else: + logger.info("FAIL: Query failed.\n") + + +def main(): + parser = argparse.ArgumentParser(description="Test llama-server functionality.") + parser.add_argument("--host", default="localhost", help="Server host") + parser.add_argument("--port", default=8080, type=int, help="Server port") + args = parser.parse_args() + + base_url = f"http://{args.host}:{args.port}/v1/chat/completions" + logger.info(f"Testing server at {base_url}\n") + + # Non-streaming tests + test_chat(base_url, stream=False) + test_tool_call(base_url, stream=False) + + # Streaming tests + test_chat(base_url, stream=True) + test_tool_call(base_url, stream=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/snapdragon/qdc/tests/test_bench.py b/scripts/snapdragon/qdc/tests/test_bench.py index 651ab5b717..bd19e5d26c 100644 --- a/scripts/snapdragon/qdc/tests/test_bench.py +++ b/scripts/snapdragon/qdc/tests/test_bench.py @@ -14,7 +14,7 @@ cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_pa def run_cmd(cmd): p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) sys.stdout.write(p.stdout) - assert(p.returncode == 0) + assert (p.returncode == 0) @pytest.mark.dependency() diff --git a/src/models/models.h b/src/models/models.h index 3c66d32531..b15fdd2e5f 100644 --- a/src/models/models.h +++ b/src/models/models.h @@ -1,10 +1,11 @@ #pragma once -#include "../llama-model.h" #include "../llama-graph.h" +#include "../llama-model.h" // TODO: remove in follow-up PR - move to .cpp files #include "../llama-memory-recurrent.h" + #include struct llm_graph_context_mamba : public llm_graph_context { @@ -12,9 +13,16 @@ struct llm_graph_context_mamba : public llm_graph_context { virtual ~llm_graph_context_mamba() = default; - ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il); - ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il) const; - + ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il); + ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il) const; }; // Base class for RWKV-related models @@ -158,8 +166,7 @@ struct llm_build_ernie4_5_moe : public llm_graph_context { llm_build_ernie4_5_moe(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_exaone4 : public llm_graph_context { +template struct llm_build_exaone4 : public llm_graph_context { llm_build_exaone4(const llama_model & model, const llm_graph_params & params); }; @@ -183,8 +190,7 @@ struct llm_build_gemma2_iswa : public llm_graph_context { llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_gemma3 : public llm_graph_context { +template struct llm_build_gemma3 : public llm_graph_context { llm_build_gemma3(const llama_model & model, const llm_graph_params & params); }; @@ -195,8 +201,8 @@ struct llm_build_gemma3n_iswa : public llm_graph_context { const int64_t n_embd_altup; const int64_t n_altup; const int i_altup_act; - const int n_layer_sparsity = 10; // number of layers using activation sparsity - const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95) + const int n_layer_sparsity = 10; // number of layers using activation sparsity + const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95) llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params); ggml_tensor * calc_magnitude(ggml_tensor * x); @@ -237,27 +243,26 @@ struct llm_build_gptneox : public llm_graph_context { struct llm_build_granite : public llm_graph_context { llm_build_granite(const llama_model & model, const llm_graph_params & params); -private: - ggml_tensor * build_attention_layer( - ggml_tensor * cur, - ggml_tensor * inp_pos, - llm_graph_input_attn_kv * inp_attn, - const llama_model & model, - const int64_t n_embd_head, - const int il); + private: + ggml_tensor * build_attention_layer(ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv * inp_attn, + const llama_model & model, + const int64_t n_embd_head, + const int il); - ggml_tensor * build_layer_ffn( - ggml_tensor * cur, - ggml_tensor * inpSA, - const llama_model & model, - const int il); + ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il); }; struct llm_build_granite_hybrid : public llm_graph_context_mamba { llm_build_granite_hybrid(const llama_model & model, const llm_graph_params & params); ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il); - ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, - const llama_model & model,const int64_t n_embd_head, const int il); + ggml_tensor * build_attention_layer(ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv * inp_attn, + const llama_model & model, + const int64_t n_embd_head, + const int il); }; struct llm_build_grok : public llm_graph_context { @@ -321,9 +326,11 @@ struct llm_build_lfm2 : public llm_graph_context { llm_build_lfm2(const llama_model & model, const llm_graph_params & params); ggml_tensor * build_moe_feed_forward(ggml_tensor * cur, int il) const; ggml_tensor * build_dense_feed_forward(ggml_tensor * cur, int il) const; - ggml_tensor * build_attn_block(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, int il) const; + ggml_tensor * build_attn_block(ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv * inp_attn, + int il) const; ggml_tensor * build_shortconv_block(ggml_tensor * cur, llm_graph_input_rs * inp_recr, int il); - }; struct llm_build_llada : public llm_graph_context { @@ -382,16 +389,18 @@ struct llm_build_nemotron : public llm_graph_context { struct llm_build_nemotron_h : public llm_graph_context_mamba { llm_build_nemotron_h(const llama_model & model, const llm_graph_params & params); ggml_tensor * build_ffn_layer(ggml_tensor * cur, const llama_model & model, const int il); - ggml_tensor * build_attention_layer(ggml_tensor * cur, llm_graph_input_attn_kv * inp_attn, - const llama_model & model, const int64_t n_embd_head, const int il); + ggml_tensor * build_attention_layer(ggml_tensor * cur, + llm_graph_input_attn_kv * inp_attn, + const llama_model & model, + const int64_t n_embd_head, + const int il); }; struct llm_build_neo_bert : public llm_graph_context { llm_build_neo_bert(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_olmo2 : public llm_graph_context { +template struct llm_build_olmo2 : public llm_graph_context { llm_build_olmo2(const llama_model & model, const llm_graph_params & params); }; @@ -423,17 +432,23 @@ struct llm_build_phi2 : public llm_graph_context { llm_build_phi2(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_phi3 : public llm_graph_context { +template struct llm_build_phi3 : public llm_graph_context { llm_build_phi3(const llama_model & model, const llm_graph_params & params); }; struct llm_build_plamo2 : public llm_graph_context_mamba { llm_build_plamo2(const llama_model & model, const llm_graph_params & params); - private: - ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il); - ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, ggml_tensor * inp_pos, ggml_tensor * cur, - const llama_model & model, int il); + private: + ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il); + ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, + ggml_tensor * inp_pos, + ggml_tensor * cur, + const llama_model & model, + int il); }; struct llm_build_plamo : public llm_graph_context { @@ -479,24 +494,20 @@ struct llm_build_qwen3vlmoe : public llm_graph_context { struct llm_build_qwen3next : public llm_graph_context_mamba { llm_build_qwen3next(const llama_model & model, const llm_graph_params & params); -private: - ggml_tensor * build_layer_attn( - llm_graph_input_attn_kv * inp_attn, - ggml_tensor * cur, - ggml_tensor * inp_pos, - int il); + private: + ggml_tensor * build_layer_attn(llm_graph_input_attn_kv * inp_attn, + ggml_tensor * cur, + ggml_tensor * inp_pos, + int il); - ggml_tensor * build_layer_attn_linear( - llm_graph_input_rs * inp, - ggml_tensor * cur, - ggml_tensor * causal_mask, - ggml_tensor * identity, - ggml_tensor * diag_mask, - int il); + ggml_tensor * build_layer_attn_linear(llm_graph_input_rs * inp, + ggml_tensor * cur, + ggml_tensor * causal_mask, + ggml_tensor * identity, + ggml_tensor * diag_mask, + int il); - ggml_tensor * build_layer_ffn( - ggml_tensor * cur, - int il); + ggml_tensor * build_layer_ffn(ggml_tensor * cur, int il); // returns pair of output and new state std::pair build_delta_net_chunking( @@ -681,8 +692,7 @@ struct llm_build_seed_oss : public llm_graph_context { llm_build_seed_oss(const llama_model & model, const llm_graph_params & params); }; -template -struct llm_build_smallthinker : public llm_graph_context { +template struct llm_build_smallthinker : public llm_graph_context { llm_build_smallthinker(const llama_model & model, const llm_graph_params & params); }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 350bffc315..1f29819f1f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -187,9 +187,7 @@ if (NOT WIN32 OR NOT BUILD_SHARED_LIBS) # llama_build_and_test(test-double-float.cpp) # SLOW endif() -llama_build_and_test(test-chat-parser.cpp) llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp) -llama_build_and_test(test-chat-template.cpp) llama_build_and_test(test-jinja.cpp) llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python) llama_build_and_test(test-json-partial.cpp) @@ -264,3 +262,5 @@ target_link_libraries(${TEST_TARGET} PRIVATE llama) llama_build_and_test(test-alloc.cpp) target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src) + + diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 5d5e44a0c7..9b5f644795 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -7756,6 +7756,8 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1, 1}, {1, 1})); } + test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 6, 4096, 5120, {1, 1}, {1, 1})); + #if 0 // test the mat-mat path for Metal for (int k = 1; k < 512; ++k) { diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp deleted file mode 100644 index 6f44a2b421..0000000000 --- a/tests/test-chat-parser.cpp +++ /dev/null @@ -1,617 +0,0 @@ -// Tests chat handling, including grammar generation and parsing for tool calling, for various templates. -// -// Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates, -// e.g. given Minja (http://github.com/google/minja) checked out in parent dir: -// -// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null -// -#include -#include -#include - -#include "chat-parser.h" -#include "common.h" -#include "log.h" -#include "regex-partial.h" - -template -static void assert_equals(const std::string_view label, const T & expected, const T & actual) { - if (expected != actual) { - std::cerr << label << std::endl; - std::cerr << "Expected: " << expected << std::endl; - std::cerr << "Actual: " << actual << std::endl; - std::cerr << std::flush; - throw std::runtime_error("Test failed"); - } -} - -template -static void assert_equals(const T & expected, const T & actual) { - assert_equals("", expected, actual); -} -static void assert_equals(const char * expected, const std::string & actual) { - return assert_equals(expected, actual); -} - -static void assert_throws(const std::function & fn, const std::string & expected_exception_pattern = "") { - try { - fn(); - } catch (const std::exception & e) { - if (expected_exception_pattern.empty()) { - return; - } - std::regex expected_exception_regex(expected_exception_pattern); - std::string actual_message = e.what(); - if (std::regex_search(actual_message, expected_exception_regex)) { - return; - } - throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")"); - throw std::runtime_error("Exception of unexpected type: " + std::string(e.what())); - } - throw std::runtime_error("Exception was expected but not thrown"); -} - -static void test_reasoning() { - //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_NONE; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(false, builder.try_parse_reasoning("", "")); - assert_equals("CogitoErgo sum", builder.consume_rest()); - } - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(true, builder.try_parse_reasoning("", "")); - assert_equals(std::string("Cogito"), builder.result().reasoning_content); - assert_equals("Ergo sum", builder.consume_rest()); - } - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_NONE; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(false, builder.try_parse_reasoning("", "")); - assert_equals("CogitoErgo sum", builder.consume_rest()); - } - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(true, builder.try_parse_reasoning("", "")); - assert_equals(std::string("Cogito"), builder.result().reasoning_content); - assert_equals("Ergo sum", builder.consume_rest()); - } - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = true; - params.thinking_forced_open = true; - common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, params); - assert_equals(true, builder.try_parse_reasoning("", "")); - assert_equals("Cogito", builder.result().content); - assert_equals("Ergo sum", builder.consume_rest()); - } - { - const std::string variant("content_only_inline_think"); - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = false; - const std::string input = "PenseBonjour"; - auto msg = common_chat_parse(input, false, params); - assert_equals(variant, std::string("Pense"), msg.reasoning_content); - assert_equals(variant, std::string("Bonjour"), msg.content); - } - { - const std::string variant("llama_3_inline_think"); - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_LLAMA_3_X; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = false; - const std::string input = "PlanRéponse"; - auto msg = common_chat_parse(input, false, params); - assert_equals(variant, std::string("Plan"), msg.reasoning_content); - assert_equals(variant, std::string("Réponse"), msg.content); - } - // Test DeepSeek V3.1 parsing - reasoning content followed by "" and then regular content - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("deepseek_v3_1_reasoning_format_deepseek"); - common_chat_msg_parser builder("REASONINGok", /* is_partial= */ false, params); - assert_equals(variant, true, builder.try_parse_reasoning("", "")); - assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content); - assert_equals(variant, std::string("ok"), builder.consume_rest()); - } - // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "" and then regular content - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_NONE; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("deepseek_v3_1_reasoning_format_none"); - const std::string input = "REASONINGok"; - auto msg = common_chat_parse(input, false, params); - assert_equals(variant, std::string("REASONINGok"), msg.content); - assert_equals(variant, std::string(""), msg.reasoning_content); - } -} - -static void test_regex() { - auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") { - common_chat_msg_parser builder(input, /* is_partial= */ false, {}); - assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern); - }; - - test_throws("Hello, world!", "abc", "^abc$"); - test_throws("Hello, world!", "e", "^e$"); - - { - common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {}); - builder.consume_regex(common_regex("Hello")); - assert_equals(", world!", builder.consume_rest()); - } - - { - // When in non partial mode, we can say whether the regex was consumed or not. - common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {}); - assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value()); - } - { - common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {}); - auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?")); - assert_equals(true, res.has_value()); - // Verify captures - assert_equals(2, res->groups.size()); - assert_equals("Hell", builder.str(res->groups[0])); - assert_equals("el", builder.str(res->groups[1])); - // Verify position is after the match - assert_equals(4, builder.pos()); - assert_equals("o,", builder.consume_rest()); - } - { - // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception. - common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {}); - assert_throws([&]() { - builder.try_consume_regex(common_regex("Hello, world!")); - }, "^Hello, world!$"); - } - - // Now regardless of the mode, we can tell these aren't a match. - for (const auto is_partial : {false, true}) { - common_chat_msg_parser builder("Hello,", is_partial, {}); - assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value()); - } - for (const auto is_partial : {false, true}) { - common_chat_msg_parser builder("Hello,", is_partial, {}); - assert_equals(false, builder.try_consume_literal("Oh")); - } -} - -const std::vector barely_healable_jsons = { - "{", - "{\"", - "{\"\\", - "{\"n", - "{\"name\"", - "{\"name\":", - "{\"name\":\"", - "{\"name\":\"\\", - "{\"name\":\"python", - "{\"name\":\"python\\", - "{\",", - "{\":", - "{\"[", - "{\"]", - "{\"{", - "{\"}", - "{\"1", - "{\"name\":\",", - "{\"name\":\":", - "{\"name\":\"[", - "{\"name\":\"]", - "{\"name\":\"{", - "{\"name\":\"}", - "{\"name\":\"1", -}; - -static void test(const std::string & input, bool is_partial, const std::vector> & args_paths, const std::vector> & content_paths, const std::string & expected) { - common_chat_msg_parser builder(input, is_partial, {}); - auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths); - assert_equals(true, js.has_value()); - assert_equals(is_partial, js->is_partial); - assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get() : js->value.dump()); -} - -static void test_deepseek_v3_1_tool_calls() { - //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); - // variant: happy path for when it works as the model card says it should - const std::string variant("simple"); - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = true; - const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto msg = common_chat_parse(input, false, params); - assert_equals(variant, 1, msg.tool_calls.size()); - assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name); - // JSON arguments are dumped without spaces - assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments); - assert_equals(variant, std::string(""), msg.content); - assert_equals(variant, std::string(""), msg.reasoning_content); - - // variant: simple + thinking open - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("simple_thinking"); - const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 1, m.tool_calls.size()); - assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); - assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); - assert_equals(variant, std::string(""), m.content); - assert_equals(variant, std::string("REASONING"), m.reasoning_content); - } - // variant: simple + multiple tool calls - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = true; - const std::string variant("simple_multiple_tool_calls"); - const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 2, m.tool_calls.size()); - assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); - assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments); - assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name); - assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments); - assert_equals(variant, std::string("CONTENT"), m.content); - assert_equals(variant, std::string(""), m.reasoning_content); - } - - - // variant: thinking forced open + tool call in reasoning content - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("thinking_forced_open_tool_call_in_reasoning"); - const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 1, m.tool_calls.size()); - assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); - assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); - assert_equals(variant, std::string(""), m.content); - assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content); - } - - // variant: thinking forced open + tool call in reasoning content + no closing think + not partial - // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting - // to make tool calls in reasoning content according to the model card, but it does sometimes, so - // add the reasoning content as regular content and parse the tool calls. - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial"); - const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, std::string("REASONING"), m.content); - assert_equals(variant, std::string(""), m.reasoning_content); - assert_equals(variant, 1, m.tool_calls.size()); - assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); - assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); - } - - // variant: thinking forced open + tool call in reasoning content + no closing think + partial - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial"); - const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"; - auto m = common_chat_parse(in, /* is_partial= */ true, params); - assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content); - assert_equals(variant, std::string(""), m.content); - assert_equals(variant, 0, m.tool_calls.size()); - } - - // variant: thinking not forced open + reasoning + regular content + no tool calls - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = true; - params.parse_tool_calls = true; - const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls"); - const std::string in = "REASONINGCONTENT"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 0, m.tool_calls.size()); - assert_equals(variant, std::string("CONTENT"), m.content); - assert_equals(variant, std::string("REASONING"), m.reasoning_content); - } - // variant: thinking not forced open + missing reasoning + no tool calls - { - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; - params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - params.parse_tool_calls = true; - const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls"); - const std::string in = "CONTENT"; - auto m = common_chat_parse(in, false, params); - assert_equals(variant, 0, m.tool_calls.size()); - assert_equals(variant, std::string("CONTENT"), m.content); - assert_equals(variant, std::string(""), m.reasoning_content); - } -} - -static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) { - common_chat_msg_parser builder(input, parse_as_partial, {}); - auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {}); - assert_equals(true, js.has_value()); - assert_equals(is_partial, js->is_partial); - assert_equals(expected, js->value.dump()); -} - -static void test_json_with_dumped_args_no_args() { - // Normal JSON, nothing to heal, nothing to dump - test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}"); - // Full json is args - test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}"); - - // If the arguments are further down, don't heal partial content. - for (const auto & src : barely_healable_jsons) { - test(src, true, {{"arguments"}}, {}, "{}"); - } - // But heal content that isn't partial. - test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}"); -} - -static void test_json_with_dumped_args() { - - // Partial content. - test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}"); - test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}"); - test("{\"content\": ", true, {}, {{"content"}}, "{}"); - - // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted). - test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python"); - for (const auto & src : barely_healable_jsons) { - test(src, true, {{}}, {}, src); - } - - // Full JSON w/ args - for (auto parse_as_partial : {true, false}) { - test_with_args( - R"({"name": "python", "args": {"arg1": 1}})", - R"({"name":"python","args":"{\"arg1\":1}"})", - parse_as_partial, - /* is_partial= */ false - ); - } - - // Partial JSON w/ partial args - test_with_args( - R"({"foo": "bar", "args": {")", - R"({"foo":"bar","args":"{\""})" - ); - // Partial args broken in object key - test_with_args( - R"({"foo": "bar", "args": {"ar)", - R"({"foo":"bar","args":"{\"ar"})" - ); - // Partial args broken after object key - test_with_args( - R"({"foo": "bar", "args": {"arg1")", - R"({"foo":"bar","args":"{\"arg1\""})" - ); - // Partial args broken before object value - test_with_args( - R"({"foo": "bar", "args": {"arg1":)", - R"({"foo":"bar","args":"{\"arg1\":"})" - ); - // Partial args broken before object value (space) - test_with_args( - R"({"foo": "bar", "args": {"arg1": )", - R"({"foo":"bar","args":"{\"arg1\":"})" - ); - // Partial args broken in object value that may not be complete (int) - test_with_args( - R"({"foo": "bar", "args": {"arg1": 1)", - R"({"foo":"bar","args":"{\"arg1\":"})" - ); - // Partial args broken in object value that is complete (int) - test_with_args( - R"({"foo": "bar", "args": {"arg1": 1 )", - R"({"foo":"bar","args":"{\"arg1\":1"})" - ); - // Partial args broken in object value that is incomplete (string) - test_with_args( - R"({"foo": "bar", "args": {"arg1": ")", - R"({"foo":"bar","args":"{\"arg1\":\""})" - ); - // Partial args broken in object value that is complete (string) - test_with_args( - R"({"foo": "bar", "args": {"arg1": "1")", - R"({"foo":"bar","args":"{\"arg1\":\"1\""})" - ); - // Partial args broken on array opening - test_with_args( - R"({"foo": "bar", "args": [)", - R"({"foo":"bar","args":"["})" - ); - // Partial args broken on array value that is incomplete (int) - test_with_args( - R"({"foo": "bar", "args": [1)", - R"({"foo":"bar","args":"["})" - ); - // Partial args broken on array value that is complete (int) - test_with_args( - R"({"foo": "bar", "args": [1 )", - R"({"foo":"bar","args":"[1"})" - ); - // Partial args broken on array value that is complete (string) - test_with_args( - R"({"foo": "bar", "args": ["1")", - R"({"foo":"bar","args":"[\"1\""})" - ); - // Partial args broken after array value - test_with_args( - R"({"foo": "bar", "args": [1,)", - R"({"foo":"bar","args":"[1,"})" - ); - // Partial args broken on nested array - test_with_args( - R"({"foo": "bar", "args": {"arg1": [)", - R"({"foo":"bar","args":"{\"arg1\":["})" - ); - - // Unicode tests - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u0)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u00)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u000)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\u0000)", - R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud8)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud80)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\u)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\ud)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\udc)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})" - ); - test_with_args( - R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)", - R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})" - ); -} - -static void test_positions() { - { - common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {}); - assert_equals(0, builder.pos()); - assert_throws([&]() { builder.move_to(100); }); - assert_equals(0, builder.pos()); - assert_throws([&]() { builder.move_back(1); }); - assert_equals(0, builder.pos()); - - builder.move_to(8); - assert_equals(8, builder.pos()); - builder.move_back(1); - assert_equals(7, builder.pos()); - assert_equals("world!", builder.consume_rest()); - - builder.move_to(0); - assert_equals(0, builder.pos()); - - assert_throws([&]() { builder.finish(); }); - assert_equals(0, builder.pos()); - - builder.move_to(builder.input().size()); - builder.finish(); - } - { - common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {}); - - builder.move_to(builder.input().size()); - assert_equals(builder.input().size(), builder.pos()); - builder.finish(); - } -} - -int main() { - test_positions(); - test_json_with_dumped_args_no_args(); - test_json_with_dumped_args(); - test_reasoning(); - test_regex(); - test_deepseek_v3_1_tool_calls(); - std::cout << "All tests passed!\n"; - return 0; -} diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp index f767c73c27..ae82966699 100644 --- a/tests/test-chat-peg-parser.cpp +++ b/tests/test-chat-peg-parser.cpp @@ -1,8 +1,3 @@ -#include -#include -#include - -#include "chat-parser.h" #include "chat-peg-parser.h" #include "chat.h" #include "common.h" @@ -10,6 +5,11 @@ #include "peg-parser.h" #include "testing.h" #include "peg-parser/simple-tokenize.h" + +#include +#include +#include + #include "nlohmann/json.hpp" using json = nlohmann::ordered_json; @@ -17,9 +17,11 @@ using json = nlohmann::ordered_json; static json create_tools(); static void test_example_native(testing & t); static void test_example_qwen3_coder(testing & t); +static void test_example_qwen3_non_coder(testing & t); static void test_command7_parser_compare(testing & t); +static void test_prefix_tool_names(testing & t); -int main(int argc, char *argv[]) { +int main(int argc, char * argv[]) { testing t(std::cout); if (argc >= 2) { t.set_filter(argv[1]); @@ -32,7 +34,9 @@ int main(int argc, char *argv[]) { t.test("native", test_example_native); t.test("qwen3 coder", test_example_qwen3_coder); + t.test("qwen3 non-coder", test_example_qwen3_non_coder); t.test("comparison", test_command7_parser_compare); + t.test("prefix tool names", test_prefix_tool_names); return t.summary(); } @@ -41,87 +45,75 @@ static json create_tools() { json tools = json::array(); json tool_weather = { - {"type", "function"}, - {"function", { - {"name", "get_current_weather"}, - {"description", "Get the current weather in a given location"}, - {"parameters", { - {"type", "object"}, - {"properties", { - {"location", { - {"type", "string"}, - {"description", "The city and state, e.g. San Francisco, CA"} - }}, - {"unit", { - {"type", "string"}, - {"enum", {"celsius", "fahrenheit"}}, - {"description", "The temperature unit to use. Infer this from the users location."} - }} - }}, - {"required", {"location", "unit"}}, - }}, - }} + { "type", "function" }, + { "function", + { + { "name", "get_current_weather" }, + { "description", "Get the current weather in a given location" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { { "location", + { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } }, + { "unit", + { { "type", "string" }, + { "enum", { "celsius", "fahrenheit" } }, + { "description", + "The temperature unit to use. Infer this from the users location." } } } } }, + { "required", { "location", "unit" } }, + } }, + } } }; tools.push_back(tool_weather); json tool_forecast = { - {"type", "function"}, - {"function", { - {"name", "get_forecast"}, - {"description", "Get the weather forecast for a given location"}, - {"parameters", { - {"type", "object"}, - {"properties", { - {"location", { - {"type", "string"}, - {"description", "The city and state, e.g. San Francisco, CA"} - }}, - {"unit", { - {"type", "string"}, - {"enum", {"celsius", "fahrenheit"}}, - {"description", "The temperature unit to use. Infer this from the users location."} - }}, - {"days", { - {"type", "integer"}, - {"description", "Number of days to forecast (1-10)"}, - {"minimum", 1}, - {"maximum", 10} - }} - }}, - {"required", {"location", "unit"}}, - }}, - }} + { "type", "function" }, + { "function", + { + { "name", "get_forecast" }, + { "description", "Get the weather forecast for a given location" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { { "location", + { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } }, + { "unit", + { { "type", "string" }, + { "enum", { "celsius", "fahrenheit" } }, + { "description", "The temperature unit to use. Infer this from the users location." } } }, + { "days", + { { "type", "integer" }, + { "description", "Number of days to forecast (1-10)" }, + { "minimum", 1 }, + { "maximum", 10 } } } } }, + { "required", { "location", "unit" } }, + } }, + } } }; tools.push_back(tool_forecast); json tool_search = { - {"type", "function"}, - {"function", { - {"name", "search_knowledge_base"}, - {"description", "Search the internal technical documentation knowledge base."}, - {"parameters", { - {"type", "object"}, - {"properties", { - {"query", { - {"type", "string"}, - {"description", "The search query string."} - }}, - {"max_results", { - {"type", "integer"}, - {"description", "The maximum number of results to return."}, - {"default", 5} - }}, - {"category", { - {"type", "string"}, - {"enum", {"api", "troubleshooting", "billing", "general"}}, - {"description", "Filter search by specific category."} - }} - }}, - {"required", {"query", "category"}}, - {"additionalProperties", false} - }}, - {"strict", true} - }} + { "type", "function" }, + { "function", + { { "name", "search_knowledge_base" }, + { "description", "Search the internal technical documentation knowledge base." }, + { "parameters", + { { "type", "object" }, + { "properties", + { { "query", { { "type", "string" }, { "description", "The search query string." } } }, + { "max_results", + { { "type", "integer" }, + { "description", "The maximum number of results to return." }, + { "default", 5 } } }, + { "category", + { { "type", "string" }, + { "enum", { "api", "troubleshooting", "billing", "general" } }, + { "description", "Filter search by specific category." } } } } }, + { "required", { "query", "category" } }, + { "additionalProperties", false } } }, + { "strict", true } } } }; tools.push_back(tool_search); @@ -131,39 +123,39 @@ static json create_tools() { struct tool_argument { std::string name; std::string type; - bool is_required; - json schema; + bool is_required; + json schema; }; struct tool_definition { - std::string name; + std::string name; std::vector arguments; - json schema; + json schema; }; // Test fictitious model output that emits arguments as JSON. static void test_example_native(testing & t) { struct test_case { // Parameters - std::string name; - json tools; + std::string name; + json tools; common_chat_tool_choice tool_choice; common_reasoning_format reasoning_format; - json json_schema; - bool parallel_tool_calls; - bool thinking_forced_open; - std::string input; + json json_schema; + bool parallel_tool_calls; + bool thinking_forced_open; + std::string input; // Expect - std::string expect_reasoning; - std::string expect_content; + std::string expect_reasoning; + std::string expect_content; std::vector expect_tool_calls; }; auto build_parser = [](const test_case & tc) { - return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) { + return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE); - auto reasoning = p.eps(); + auto reasoning = p.eps(); if (tc.thinking_forced_open) { // If thinking is forced open, expect a closing tag reasoning = p.reasoning(p.until("")) + "" + p.space(); @@ -174,231 +166,188 @@ static void test_example_native(testing & t) { // tool calling parser if (tc.tools.is_array() && !tc.tools.empty()) { - auto tools = p.choice(); - for (const auto & tool : tc.tools) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + auto tool_call = + p.standard_json_tools("[", "]", tc.tools, tc.parallel_tool_calls, + tc.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED); - auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\""); - auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))); - - tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}"); - }; - - auto parallel_calls = p.eps(); - if (tc.parallel_tool_calls) { - parallel_calls = p.zero_or_more("," << tools); - } - - auto tool_call = p.trigger_rule("tool-call", - p.sequence({ - p.literal("["), - tools, - parallel_calls, - p.literal("]") - }) - ); - - return p.sequence({ - (reasoning_in_content ? p.eps() : reasoning), - p.content(p.until("")), - p.optional(p.space() + tool_call), - p.space(), - p.end() - }); + return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.until("")), + p.optional(p.space() + tool_call), p.space(), p.end() }); } // response_format parser if (tc.json_schema.is_object() && !tc.json_schema.empty()) { - return p.sequence({ - (reasoning_in_content ? p.eps() : reasoning), - p.content(p.schema(p.json(), "response-output", tc.json_schema)), - p.space(), - p.end() - }); + return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), + p.content(p.schema(p.json(), "response-output", tc.json_schema)), p.space(), + p.end() }); } // Content-only parser - return p.sequence({ - (reasoning_in_content ? p.eps() : reasoning), - p.content(p.rest()), - p.end() - }); + return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.rest()), p.end() }); }); }; std::vector test_cases = std::vector{ { - /* .name = */ "content with thinking_forced_open = false", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ false, - /* .input = */ ( - "The user said hello, I must say hello back\nHello" - ), - /* .expect_reasoning = */ "The user said hello, I must say hello back", - /* .expect_content = */ "Hello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = false", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ false, + /* .input = */ ("The user said hello, I must say hello back\nHello"), + /* .expect_reasoning = */ "The user said hello, I must say hello back", + /* .expect_content = */ "Hello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "content with thinking_forced_open = false and no reasoning", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ false, - /* .input = */ ( - "Hello" - ), - /* .expect_reasoning = */ "", - /* .expect_content = */ "Hello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = false and no reasoning", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ false, + /* .input = */ ("Hello"), + /* .expect_reasoning = */ "", + /* .expect_content = */ "Hello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "content with thinking_forced_open = false and reasoning_format = none", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "The user said hello, I must say hello back\nHello" - ), - /* .expect_reasoning = */ "", - /* .expect_content = */ "The user said hello, I must say hello back\nHello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = false and reasoning_format = none", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ ("The user said hello, I must say hello back\nHello"), + /* .expect_reasoning = */ "", + /* .expect_content = */ "The user said hello, I must say hello back\nHello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "content with thinking_forced_open = true", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "The user said hello, I must say hello back\nHello" - ), - /* .expect_reasoning = */ "The user said hello, I must say hello back", - /* .expect_content = */ "Hello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = true", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ ("The user said hello, I must say hello back\nHello"), + /* .expect_reasoning = */ "The user said hello, I must say hello back", + /* .expect_content = */ "Hello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "content with thinking_forced_open = true and reasoning_format = none", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "The user said hello, I must say hello back\nHello" - ), - /* .expect_reasoning = */ "", - /* .expect_content = */ "The user said hello, I must say hello back\nHello", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "content with thinking_forced_open = true and reasoning_format = none", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ ("The user said hello, I must say hello back\nHello"), + /* .expect_reasoning = */ "", + /* .expect_content = */ "The user said hello, I must say hello back\nHello", + /* .expect_tool_calls = */ {}, + }, { - /* .name = */ "tools with tool_choice = auto and no parallel_tool_calls", - /* .tools = */ create_tools(), - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "I must get the weather in New York\n" - "[" - R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})" - "]" - ), - /* .expect_reasoning = */ "I must get the weather in New York", - /* .expect_content = */ "", - /* .expect_tool_calls = */ {{ + /* .name = */ "tools with tool_choice = auto and no parallel_tool_calls", + /* .tools = */ create_tools(), + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ + ("I must get the weather in New York\n" + "[" + R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})" + "]"), + /* .expect_reasoning = */ "I must get the weather in New York", + /* .expect_content = */ "", + /* .expect_tool_calls = */ + { { /* .name = */ "get_current_weather", /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})", /* .id = */ "", - }}, - }, + } }, + }, { - /* .name = */ "tools with tool_choice = auto and parallel_tool_calls", - /* .tools = */ create_tools(), - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ {}, - /* .parallel_tool_calls = */ true, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "I must get the weather in New York and San Francisco and a 3 day forecast of each.\nLet me search that for you." - "[" - R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})" - ", " - R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})" - ", " - R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})" - ", " - R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})" - "]" - ), - /* .expect_reasoning = */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.", - /* .expect_content = */ "Let me search that for you.", - /* .expect_tool_calls = */ {{ - /* .name = */ "get_current_weather", - /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})", - /* .id = */ "", - }, { - /* .name = */ "get_current_weather", - /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})", - /* .id = */ "", - }, { - /* .name = */ "get_forecast", - /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})", - /* .id = */ "", - }, { - /* .name = */ "get_forecast", - /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})", - /* .id = */ "", - }}, - }, + /* .name = */ "tools with tool_choice = auto and parallel_tool_calls", + /* .tools = */ create_tools(), + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ {}, + /* .parallel_tool_calls = */ true, + /* .thinking_forced_open = */ true, + /* .input = */ + ("I must get the weather in New York and San Francisco and a 3 day forecast of each.\nLet me " + "search that for you." + "[" + R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})" + ", " + R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})" + ", " + R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})" + ", " + R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})" + "]"), + /* .expect_reasoning = */ + "I must get the weather in New York and San Francisco and a 3 day forecast of each.", /* .expect_content = */ "Let me search that for you.", + /* .expect_tool_calls = */ + { { + /* .name = */ "get_current_weather", + /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})", + /* .id = */ "", + }, + { + /* .name = */ "get_current_weather", + /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})", + /* .id = */ "", + }, + { + /* .name = */ "get_forecast", + /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})", + /* .id = */ "", + }, + { + /* .name = */ "get_forecast", + /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})", + /* .id = */ "", + } }, + }, { - /* .name = */ "response_format with thinking_forced_open = true", - /* .tools = */ {}, - /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .json_schema = */ { - {"type", "object"}, - {"properties", { - {"invoice_number", {{"type", "string"}}}, - {"amount", {{"type", "number"}}}, - {"due_date", {{"type", "string"}}} - }}, - {"required", {"invoice_number", "amount", "due_date"}} - }, - /* .parallel_tool_calls = */ false, - /* .thinking_forced_open = */ true, - /* .input = */ ( - "I must produce the invoice in the requested format\n" - R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})" - ), - /* .expect_reasoning = */ "I must produce the invoice in the requested format", - /* .expect_content = */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", - /* .expect_tool_calls = */ {}, - }, + /* .name = */ "response_format with thinking_forced_open = true", + /* .tools = */ {}, + /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, + /* .json_schema = */ + { { "type", "object" }, + { "properties", + { { "invoice_number", { { "type", "string" } } }, + { "amount", { { "type", "number" } } }, + { "due_date", { { "type", "string" } } } } }, + { "required", { "invoice_number", "amount", "due_date" } } }, + /* .parallel_tool_calls = */ false, + /* .thinking_forced_open = */ true, + /* .input = */ + ("I must produce the invoice in the requested format\n" + R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"), + /* .expect_reasoning = */ "I must produce the invoice in the requested format", + /* .expect_content = */ + R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", /* .expect_tool_calls = */ {}, + }, }; for (const auto & tc : test_cases) { t.test(tc.name, [&](testing & t) { - auto parser = build_parser(tc); - auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + auto parser = build_parser(tc); + auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; auto grammar = build_grammar([&](const common_grammar_builder & builder) { - for (auto const & def : tc.tools) { - auto function = def.at("function"); + for (const auto & def : tc.tools) { + auto function = def.at("function"); auto parameters = function.at("parameters"); builder.resolve_refs(parameters); }; @@ -406,17 +355,17 @@ static void test_example_native(testing & t) { }); t.log("Grammar:"); - for (auto const & line : string_split(grammar, "\n")) { + for (const auto & line : string_split(grammar, "\n")) { t.log(line); } common_peg_parse_context ctx(tc.input, false); - auto result = parser.parse(ctx); + auto result = parser.parse(ctx); t.assert_true("success", result.success()); common_chat_msg msg; - auto mapper = common_chat_peg_native_mapper(msg); + auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); t.assert_equal("content equal", tc.expect_content, msg.content); @@ -431,16 +380,16 @@ static void test_example_native(testing & t) { } static void test_example_qwen3_coder(testing & t) { - auto tools = create_tools(); - auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) { + auto tools = create_tools(); + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { auto content = p.rule("content", p.content(p.until(""))); std::vector tool_parsers; - for (auto const & def : tools) { - auto function = def.at("function"); - std::string name = function.at("name"); - auto parameters = function.at("parameters"); - auto properties = parameters.at("properties"); + for (const auto & def : tools) { + auto function = def.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + auto properties = parameters.at("properties"); std::set required_properties; if (function.contains("required")) { @@ -450,59 +399,36 @@ static void test_example_qwen3_coder(testing & t) { std::vector arg_parsers; for (const auto & [param_name, param_schema] : properties.items()) { bool is_required = required_properties.find(param_name) != required_properties.end(); - auto type = param_schema.value("type", "object"); + auto type = param_schema.value("type", "object"); - auto arg = p.tool_arg(p.sequence({ - p.tool_arg_open(""), - (type == "string" ? - p.tool_arg_string_value( - p.schema( - p.until_one_of({ - "\n\n" - }), - "tool-" + name + "-arg-" + param_name + "-schema", - param_schema, - true - ) - ) : p.tool_arg_json_value( - p.schema( - p.json(), - "tool-" + name + "-arg-" + param_name + "-schema", - param_schema - ) - ) - ), - p.tool_arg_close( - "\n" + - p.peek(p.literal("")) - ) - })); + auto arg = p.tool_arg( + p.sequence({ p.tool_arg_open(""), + (type == "string" ? + p.tool_arg_string_value(p.schema( + p.until_one_of({ "\n\n" }), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : + p.tool_arg_json_value(p.schema( + p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema))), + p.tool_arg_close("\n" + + p.peek(p.literal(""))) })); - arg_parsers.push_back(is_required ? - p.rule("tool-" + name + "-arg-" + param_name, arg) : - p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); + arg_parsers.push_back(is_required ? p.rule("tool-" + name + "-arg-" + param_name, arg) : + p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); } - tool_parsers.push_back(p.rule("tool-" + name, - p.tool_open("") - << p.sequence(arg_parsers) - << p.tool_close(p.literal("")) - )); + tool_parsers.push_back(p.rule("tool-" + name, p.tool_open("") + << p.sequence(arg_parsers) + << p.tool_close(p.literal("")))); }; - auto tool_call = p.trigger_rule("tool-call", - "" - << p.choice(tool_parsers) - << "" - ); + auto tool_call = p.trigger_rule("tool-call", "" << p.choice(tool_parsers) << ""); return content + p.zero_or_more(p.space() + tool_call) + p.end(); }); auto grammar = build_grammar([&](const common_grammar_builder & builder) { - for (auto const & def : tools) { - auto function = def.at("function"); + for (const auto & def : tools) { + auto function = def.at("function"); auto parameters = function.at("parameters"); builder.resolve_refs(parameters); }; @@ -510,11 +436,11 @@ static void test_example_qwen3_coder(testing & t) { }); t.log("Grammar:"); - for (auto const & line : string_split(grammar, "\n")) { + for (const auto & line : string_split(grammar, "\n")) { t.log(line); } - t.test("incremental parsing", [&](testing &t) { + t.test("incremental parsing", [&](testing & t) { std::string input = "Let me search the knowledge base for cat pictures." "\n" @@ -538,7 +464,7 @@ static void test_example_qwen3_coder(testing & t) { } common_chat_msg msg; - auto mapper = common_chat_peg_constructed_mapper(msg); + auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); //t.log("Input: " + input); @@ -554,7 +480,105 @@ static void test_example_qwen3_coder(testing & t) { try { // This shouldn't emit any runtime errors auto diffs = common_chat_msg_diff::compute_diffs(prev, msg); - } catch(const std::exception & e) { + } catch (const std::exception & e) { + t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); + t.assert_true(std::string("failed with ") + e.what(), false); + } + + prev = msg; + } + }); +} + +static void test_example_qwen3_non_coder(testing & t) { + auto tools = create_tools(); + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + // tool calling parser using standard JSON format + auto tool_call = p.standard_json_tools("", "", tools, true, false); + + return p.sequence({ p.content(p.until("")), p.optional(p.space() + tool_call), p.end() }); + }); + + auto grammar = build_grammar([&](const common_grammar_builder & builder) { + for (const auto & def : tools) { + auto function = def.at("function"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + }; + parser.build_grammar(builder); + }); + + t.log("Grammar:"); + for (const auto & line : string_split(grammar, "\n")) { + t.log(line); + } + + t.test("tool call parsing", [&](testing & t) { + std::string input = + "I need to get the weather.\n" + "" + "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": " + "\"fahrenheit\"}}" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("content", "I need to get the weather.", msg.content); + t.assert_equal("reasoning", "", msg.reasoning_content); + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); + t.assert_equal("tool args", "{\"location\": \"New York City, NY\", \"unit\": \"fahrenheit\"}", + msg.tool_calls[0].arguments); + } + }); + + t.test("incremental parsing", [&](testing & t) { + std::string input = + "I need to get the weather.\n" + "" + "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": " + "\"fahrenheit\"}}" + ""; + + std::vector tokens = simple_tokenize(input); + + common_chat_msg prev; + for (auto it = tokens.begin(); it != tokens.end(); it++) { + std::string in = std::accumulate(tokens.begin(), it + 1, std::string()); + + common_peg_parse_context ctx(in, it + 1 < tokens.end()); + + auto result = parser.parse(ctx); + if (!t.assert_equal("not fail", false, result.fail())) { + t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); + } + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + //t.log("Input: " + input); + t.log("==========================================="); + t.log("Iteration " + std::to_string(in.size())); + t.log("Reasoning: " + msg.reasoning_content); + t.log("Content : " + msg.content); + for (const auto & tc : msg.tool_calls) { + t.log("Tool name: " + tc.name); + t.log("Tool args: " + tc.arguments); + } + + try { + // This shouldn't emit any runtime errors + auto diffs = common_chat_msg_diff::compute_diffs(prev, msg); + } catch (const std::exception & e) { t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); t.assert_true(std::string("failed with ") + e.what(), false); } @@ -565,38 +589,37 @@ static void test_example_qwen3_coder(testing & t) { } void test_command7_parser_compare(testing & t) { - auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) { - auto thinking = p.reasoning_block( - "<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>"); + auto parser = build_chat_peg_unified_parser([](common_chat_peg_unified_builder & p) { + auto thinking = + p.reasoning_block("<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>"); auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>"; auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\""))); - auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\""))); + auto tool_call_name = + p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\""))); auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json())); auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args); - auto tool_call = p.rule("tool-call", p.tool( - p.tool_open(p.literal("{")) - << tool_call_fields - << p.zero_or_more( p.literal(",") << tool_call_fields) - << p.tool_close(p.literal("}")) - )); + auto tool_call = + p.rule("tool-call", p.tool(p.tool_open(p.literal("{")) + << tool_call_fields << p.zero_or_more(p.literal(",") << tool_call_fields) + << p.tool_close(p.literal("}")))); - auto tool_calls = p.rule("tool-calls", - "<|START_ACTION|>" - << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]") - << "<|END_ACTION|>"); + auto tool_calls = p.rule( + "tool-calls", "<|START_ACTION|>" << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]") + << "<|END_ACTION|>"); return p.optional(thinking) << (tool_calls | response) + p.end(); }); - auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) { + auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, + bool print_results) { common_peg_parse_context ctx(input, is_partial); - auto result = p.parse(ctx); + auto result = p.parse(ctx); common_chat_msg msg; - auto mapper = common_chat_peg_native_mapper(msg); + auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); if (print_results) { @@ -614,79 +637,19 @@ void test_command7_parser_compare(testing & t) { } }; - auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) { - // Original common_chat_combinator_parser taken from chat.cpp - common_chat_parser_params params; - params.format = COMMON_CHAT_FORMAT_GENERIC; - params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - params.reasoning_in_content = false; - params.thinking_forced_open = false; - common_chat_msg_parser builder( - input, - /* .is_partial = */ need_more_input, - params - ); + std::string reasoning = + "To plan an effective trip to Japan that includes both historical sites and modern attractions within a " + "budget of $4000 for a two-week stay, we need to:\n\n" + "1. Identify key historical sites and modern attractions in Japan.\n" + "2. Find affordable accommodation options that provide a balance between comfort and cost.\n" + "3. Determine the best modes of transportation for getting around Japan.\n" + "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without " + "overspending.\n" + "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees " + "to attractions."; - builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>"); - - static const common_regex start_action_regex("<\\|START_ACTION\\|>"); - static const common_regex end_action_regex("<\\|END_ACTION\\|>"); - static const common_regex start_response_regex("<\\|START_RESPONSE\\|>"); - static const common_regex end_response_regex("<\\|END_RESPONSE\\|>"); - - if (auto res = builder.try_find_regex(start_action_regex)) { - // If we didn't extract thoughts, prelude includes them. - auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } }); - for (const auto & tool_call : tool_calls.value) { - std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : ""; - std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : ""; - std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : ""; - if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - } - if (tool_calls.is_partial) { - throw common_chat_msg_partial_exception("incomplete tool call"); - } - builder.consume_regex(end_action_regex); - } else if (auto res = builder.try_find_regex(start_response_regex)) { - if (!builder.try_find_regex(end_response_regex)) { - builder.add_content(builder.consume_rest()); - throw common_chat_msg_partial_exception(end_response_regex.str()); - } - } else { - builder.add_content(builder.consume_rest()); - } - - if (print_results) { - std::cout << "== Parsed (legacy) ==\n"; - std::cout << "=== Reasoning ===\n"; - std::cout << builder.result().reasoning_content << "\n"; - std::cout << "\n\n=== Content ===\n"; - std::cout << builder.result().content << "\n"; - std::cout << "\n\n=== Tool Calls ===\n"; - for (const auto & tc : builder.result().tool_calls) { - std::cout << "id: " << tc.id << "\n"; - std::cout << "name: " << tc.name << "\n"; - std::cout << "args: " << tc.arguments << "\n"; - } - } - }; - - std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a " - "budget of $4000 for a two-week stay, we need to:\n\n" - "1. Identify key historical sites and modern attractions in Japan.\n" - "2. Find affordable accommodation options that provide a balance between comfort and cost.\n" - "3. Determine the best modes of transportation for getting around Japan.\n" - "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without " - "overspending.\n" - "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees " - "to attractions."; - - std::vector> tool_calls = {{ - "call_0", - "plan_trip", - nlohmann::json::parse(R"({ + std::vector> tool_calls = { + { "call_0", "plan_trip", nlohmann::json::parse(R"({ "destination": "Japan", "duration": 14, "budget": 4000, @@ -694,8 +657,8 @@ void test_command7_parser_compare(testing & t) { "accommodation_preferences": "affordable", "transportation_preferences": "efficient", "meal_preferences": "local cuisine" - })") - }}; + })") } + }; std::vector tokens; @@ -712,10 +675,10 @@ void test_command7_parser_compare(testing & t) { auto json = nlohmann::json::array(); for (const auto & tc : tool_calls) { - auto tc_json = nlohmann::json::object(); + auto tc_json = nlohmann::json::object(); tc_json["tool_call_id"] = std::get<0>(tc); - tc_json["tool_name"] = std::get<1>(tc); - tc_json["parameters"] = std::get<2>(tc); + tc_json["tool_name"] = std::get<1>(tc); + tc_json["parameters"] = std::get<2>(tc); json.push_back(tc_json); } @@ -727,42 +690,191 @@ void test_command7_parser_compare(testing & t) { std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string()); - // Run tests - t.test("legacy_parse", [&](testing & /* t */) { - test_legacy(input, false, false); + t.test("current_parse", [&](testing & /* t */) { test_current(parser, input, false, false); }); + t.bench("current_parse_benchmark complete", [&]() { test_current(parser, input, false, false); }, 100); + t.bench( + "current_parse_benchmark incremental", + [&]() { + std::string in; + for (auto i = 0u; i < tokens.size(); i++) { + in += tokens[i]; + test_current(parser, in, i + 1 < tokens.size(), false); + } + }, + 20); +} + +// Test that tool names that are proper prefixes of other tool names don't cause +// premature matching during incremental parsing. +// For example, "special_function" should not match when parsing "special_function_with_opt". +static void test_prefix_tool_names(testing & t) { + // Create tools where one name is a proper prefix of another + json tools = json::array(); + + json tool_short = { + { "type", "function" }, + { "function", + { + { "name", "special_function" }, + { "description", "A special function" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { + { "arg1", { { "type", "integer" } } }, + } }, + { "required", { "arg1" } }, + } }, + } } + }; + tools.push_back(tool_short); + + json tool_long = { + { "type", "function" }, + { "function", + { + { "name", "special_function_with_opt" }, + { "description", "A special function with optional params" }, + { "parameters", + { + { "type", "object" }, + { "properties", + { + { "arg1", { { "type", "integer" } } }, + { "arg2", { { "type", "integer" } } }, + } }, + { "required", { "arg1" } }, + } }, + } } + }; + tools.push_back(tool_long); + + // Use standard_constructed_tools which had the prefix matching bug + std::map markers = { + { "tool_call_start_marker", "" }, + { "tool_call_end_marker", "" }, + { "function_opener", "" }, + { "function_name_suffix", ">" }, + { "parameter_key_prefix", "" }, + { "parameter_closer", "" }, + }; + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto content = p.rule("content", p.content(p.until(""))); + auto tool_call = p.standard_constructed_tools(markers, tools, false, false); + return content + p.zero_or_more(p.space() + tool_call) + p.end(); }); - t.test("current_parse", [&](testing & /* t */) { - test_current(parser, input, false, false); + // Test parsing the long tool name - this should NOT trigger the short tool name + t.test("parse long tool name", [&](testing & t) { + std::string input = + "Let me call the function." + "" + "" + "42" + "" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("content", "Let me call the function.", msg.content); + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "special_function_with_opt", msg.tool_calls[0].name); + } }); - // Run benchmarks - t.bench("legacy_parse_benchmark complete", [&]() { - test_legacy(input, false, false); - }); + // Test incremental parsing - the key test case + // This ensures that when incrementally parsing "special_function_with_opt", + // we don't prematurely emit "special_function" as a tool call + t.test("incremental parse long tool name", [&](testing & t) { + std::string input = + "Let me call the function." + "" + "" + "42" + "" + ""; - t.bench("legacy_parse_benchmark incremental", [&]() { - std::string in; - for (auto i = 0u; i < tokens.size(); i++) { - in += tokens[i]; + std::vector tokens = simple_tokenize(input); + + common_chat_msg prev; + for (auto it = tokens.begin(); it != tokens.end(); it++) { + std::string in = std::accumulate(tokens.begin(), it + 1, std::string()); + + common_peg_parse_context ctx(in, it + 1 < tokens.end()); + auto result = parser.parse(ctx); + + if (!t.assert_equal("not fail", false, result.fail())) { + t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); + return; + } + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + // The critical check: during incremental parsing, we should never + // see "special_function" as the tool name when parsing "special_function_with_opt" + for (const auto & tc : msg.tool_calls) { + if (!t.assert_equal("tool name should not be short prefix", false, + tc.name == "special_function")) { + t.log("Premature tool name match at input: " + in); + return; + } + } try { - test_legacy(in, i + 1 < tokens.size(), false); - } catch (common_chat_msg_partial_exception & /* e */) { - // Do nothing, this is expected + auto diffs = common_chat_msg_diff::compute_diffs(prev, msg); + } catch (const std::exception & e) { + t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end)); + t.assert_true(std::string("diff failed with ") + e.what(), false); + return; } - } - }, 20); - t.bench("current_parse_benchmark complete", [&]() { - test_current(parser, input, false, false); - }, 100); - - t.bench("current_parse_benchmark incremental", [&]() { - std::string in; - for (auto i = 0u; i < tokens.size(); i++) { - in += tokens[i]; - test_current(parser, in, i + 1 < tokens.size(), false); + prev = msg; } - }, 20); + + // Final check: the complete parse should have the correct tool name + t.assert_equal("final tool calls count", 1u, prev.tool_calls.size()); + if (!prev.tool_calls.empty()) { + t.assert_equal("final tool name", "special_function_with_opt", prev.tool_calls[0].name); + } + }); + + // Test parsing the short tool name still works + t.test("parse short tool name", [&](testing & t) { + std::string input = + "Let me call the function." + "" + "" + "42" + "" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("content", "Let me call the function.", msg.content); + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "special_function", msg.tool_calls[0].name); + } + }); } diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp deleted file mode 100644 index 27b537a036..0000000000 --- a/tests/test-chat-template.cpp +++ /dev/null @@ -1,680 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include - -#undef NDEBUG -#include - -#include "llama.h" -#include "common.h" -#include "chat.h" -#include "jinja/runtime.h" -#include "jinja/parser.h" -#include "jinja/lexer.h" -#include "jinja/caps.h" - -using json = nlohmann::ordered_json; - -int main_automated_tests(void); - -void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false); -void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = ""); - - - -std::string HELP = R"( -Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE -Options: - -h, --help Show this help message and exit. - --json Path to the JSON input file. - --stop-on-first-fail Stop testing on the first failure (default: false). - --no-common Use direct Jinja engine instead of common chat templates (default: use common). - --output Path to output results (only for single template runs). -If PATH_TO_TEMPLATE is a file, runs that single template. -If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory. -If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode). -)"; - -std::string DEFAULT_JSON = R"({ - "messages": [ - { - "role": "user", - "content": "Hello, how are you?" - }, - { - "role": "assistant", - "content": "I am fine, thank you!" - } - ], - "bos_token": "", - "eos_token": "", - "add_generation_prompt": true -})"; - -int main(int argc, char ** argv) { - std::vector args(argv, argv + argc); - - std::string tmpl_path; - std::string json_path; - std::string output_path; - bool stop_on_first_fail = false; - bool use_common = true; - - for (size_t i = 1; i < args.size(); i++) { - if (args[i] == "--help" || args[i] == "-h") { - std::cout << HELP << "\n"; - return 0; - } else if (args[i] == "--json" && i + 1 < args.size()) { - json_path = args[i + 1]; - i++; - } else if (args[i] == "--stop-on-first-fail") { - stop_on_first_fail = true; - } else if (args[i] == "--output" && i + 1 < args.size()) { - output_path = args[i + 1]; - i++; - } else if (args[i] == "--no-common") { - use_common = true; - } else if (tmpl_path.empty()) { - tmpl_path = args[i]; - } else { - std::cerr << "Unknown argument: " << args[i] << "\n"; - std::cout << HELP << "\n"; - return 1; - } - } - - if (tmpl_path.empty()) { - return main_automated_tests(); - } - - json input_json; - if (!json_path.empty()) { - std::ifstream json_file(json_path); - if (!json_file) { - std::cerr << "Error: Could not open JSON file: " << json_path << "\n"; - return 1; - } - std::string content = std::string( - std::istreambuf_iterator(json_file), - std::istreambuf_iterator()); - input_json = json::parse(content); - } else { - input_json = json::parse(DEFAULT_JSON); - } - - std::filesystem::path p(tmpl_path); - if (std::filesystem::is_directory(p)) { - run_multiple(tmpl_path, stop_on_first_fail, input_json, use_common); - } else if (std::filesystem::is_regular_file(p)) { - std::ifstream infile(tmpl_path); - std::string contents = std::string( - std::istreambuf_iterator(infile), - std::istreambuf_iterator()); - run_single(contents, input_json, use_common, output_path); - } else { - std::cerr << "Error: PATH_TO_TEMPLATE is not a valid file or directory: " << tmpl_path << "\n"; - return 1; - } - - return 0; -} - -void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) { - std::vector failed_tests; - - // list all files in models/templates/ and run each - size_t test_count = 0; - - for (const auto & entry : std::filesystem::directory_iterator(dir_path)) { - // only process .jinja files - if (entry.path().extension() == ".jinja" && entry.is_regular_file()) { - test_count++; - std::cout << "\n\n=== RUNNING TEMPLATE FILE: " << entry.path().string() << " ===\n"; - std::ifstream infile(entry.path()); - std::string contents((std::istreambuf_iterator(infile)), std::istreambuf_iterator()); - try { - run_single(contents, input, use_common); - } catch (const std::exception & e) { - std::cout << "Exception: " << e.what() << "\n"; - std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n"; - failed_tests.push_back(entry.path().string()); - if (stop_on_first_fail) { - break; - } - } - } - } - - std::cout << "\n\n=== TEST SUMMARY ===\n"; - std::cout << "Total tests run: " << test_count << "\n"; - std::cout << "Total failed tests: " << failed_tests.size() << "\n"; - for (const auto & test : failed_tests) { - std::cout << "FAILED TEST: " << test << "\n"; - } -} - - -static std::string normalize_newlines(const std::string & s) { -#ifdef _WIN32 - static const std::regex nl_regex("\r\n"); - return std::regex_replace(s, nl_regex, "\n"); -#else - return s; -#endif -} - - -static std::string format_using_common( - const std::string & template_str, - const std::string & bos_token, - const std::string & eos_token, - std::vector & messages, - std::vector tools = {}) { - auto tmpls = common_chat_templates_init(/* model= */ nullptr, template_str, bos_token, eos_token); - common_chat_templates_inputs inputs; - inputs.use_jinja = true; - inputs.messages = messages; - inputs.tools = tools; - inputs.add_generation_prompt = true; - auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt; - output = normalize_newlines(output); - return output; -} - - -// skip libcommon, use direct jinja engine -static jinja::value_string format_using_direct_engine( - const std::string & template_str, - json & input) { - // lexing - jinja::lexer lexer; - auto lexer_res = lexer.tokenize(template_str); - - // compile to AST - jinja::program ast = jinja::parse_from_tokens(lexer_res); - - // check caps for workarounds - jinja::caps_get(ast); - - std::cout << "\n=== RUN ===\n"; - jinja::context ctx(template_str); - - jinja::global_from_json(ctx, input, true); - - jinja::runtime runtime(ctx); - const jinja::value results = runtime.execute(ast); - auto parts = runtime.gather_string_parts(results); - - std::cout << "\n=== RESULTS ===\n"; - for (const auto & part : parts->as_string().parts) { - std::cout << (part.is_input ? "DATA" : "TMPL") << ": " << part.val << "\n"; - } - - return parts; -} - - -void run_single(std::string contents, json input, bool use_common, const std::string & output_path) { - jinja::enable_debug(true); - - jinja::value_string output_parts; - - if (use_common) { - std::string bos_token = ""; - std::string eos_token = ""; - if (input.contains("bos_token")) { - bos_token = input["bos_token"].get(); - } - if (input.contains("eos_token")) { - eos_token = input["eos_token"].get(); - } - nlohmann::ordered_json msgs_json = input["messages"]; - nlohmann::ordered_json tools_json = input["tools"]; - auto messages = common_chat_msgs_parse_oaicompat(msgs_json); - auto tools = common_chat_tools_parse_oaicompat(tools_json); - auto output = format_using_common(contents, bos_token, eos_token, messages, tools); - std::cout << "\n=== OUTPUT ===\n"; - std::cout << output << "\n"; - output_parts = jinja::mk_val(output); - - } else { - output_parts = format_using_direct_engine(contents, input); - std::cout << "\n=== OUTPUT ===\n"; - std::cout << output_parts->as_string().str() << "\n"; - } - - if (!output_path.empty()) { - std::ofstream outfile(output_path); - if (!outfile) { - throw std::runtime_error("Could not open output file: " + output_path); - } - outfile << output_parts->as_string().str(); - outfile.close(); - std::cout << "\n=== OUTPUT WRITTEN TO " << output_path << " ===\n"; - } -} - - - - - -// -// Automated tests for chat templates -// - -#define U8C(x) (const char*)(u8##x) - -static common_chat_msg simple_msg(const std::string & role, const std::string & content) { - common_chat_msg msg; - msg.role = role; - msg.content = content; - return msg; -} - -int main_automated_tests(void) { - // jinja::enable_debug(true); - - std::vector conversation { - {"system", "You are a helpful assistant"}, - {"user", "Hello"}, - {"assistant", "Hi there"}, - {"user", "Who are you"}, - {"assistant", " I am an assistant "}, - {"user", "Another question"}, - }; - - // std::string wrong = /* .template_str= */ u8"[gMASK]{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}"; - struct TestCase { - std::string name; - std::string template_str; - std::string expected_output; - std::string expected_output_jinja; - std::string bos_token = ""; - std::string eos_token = ""; - bool supported_with_jinja = true; - }; - std::vector test_cases { - { - /* .name= */ "teknium/OpenHermes-2.5-Mistral-7B", - /* .template_str= */ "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}", - /* .expected_output= */ "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\nHi there<|im_end|>\n<|im_start|>user\nWho are you<|im_end|>\n<|im_start|>assistant\n I am an assistant <|im_end|>\n<|im_start|>user\nAnother question<|im_end|>\n<|im_start|>assistant\n", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (NOTE: Old pre-v1 without a system prompt)", - /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", - /* .expected_output= */ "[INST] You are a helpful assistant\nHello [/INST]Hi there[INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "TheBloke/FusionNet_34Bx2_MoE-AWQ", - /* .template_str= */ "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <>\\n' + messages[idx]['content'] + '\\n<>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' ' + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}", - /* .expected_output= */ "[INST] <>\nYou are a helpful assistant\n<>\n\nHello [/INST]Hi there[INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", - /* .expected_output_jinja= */ "[INST] <>\nYou are a helpful assistant\n<>\n\nHello [/INST] Hi there [INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "bofenghuang/vigogne-2-70b-chat", - /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'Vous êtes Vigogne, un assistant IA créé par Zaion Lab. Vous suivez extrêmement bien les instructions. Aidez autant que vous le pouvez.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}", - /* .expected_output= */ "[INST] <>\nYou are a helpful assistant\n<>\n\nHello [/INST]Hi there[INST] Who are you [/INST]I am an assistant[INST] Another question [/INST]", - /* .expected_output_jinja= */ "[INST] <>\nYou are a helpful assistant\n<>\n\nHello [/INST] Hi there [INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "mlabonne/AlphaMonarch-7B", - /* .template_str= */ "{% for message in messages %}{{bos_token + message['role'] + '\\n' + message['content'] + eos_token + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\\n' }}{% endif %}", - /* .expected_output= */ "system\nYou are a helpful assistant\nuser\nHello\nassistant\nHi there\nuser\nWho are you\nassistant\n I am an assistant \nuser\nAnother question\nassistant\n", - /* .expected_output_jinja= */ "system\nYou are a helpful assistant\nuser\nHello\nassistant\nHi there\nuser\nWho are you\nassistant\n I am an assistant \nuser\nAnother question\nassistant\n", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "google/gemma-7b-it", - /* .template_str= */ "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\\n' + message['content'] | trim + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\\n'}}{% endif %}", - /* .expected_output= */ "user\nYou are a helpful assistant\n\nHello\nmodel\nHi there\nuser\nWho are you\nmodel\nI am an assistant\nuser\nAnother question\nmodel\n", - /* .expected_output_jinja= */ "user\nYou are a helpful assistant\nHello\nmodel\nHi there\nuser\nWho are you\nmodel\nI am an assistant\nuser\nAnother question\nmodel\n", - }, - { - /* .name= */ "OrionStarAI/Orion-14B-Chat", - /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}", - /* .expected_output= */ "Human: You are a helpful assistant\n\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", - /* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "openchat/openchat-3.5-0106", - // The included chat_template differs from the author's suggestions here: https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d - // So we match against the included template but implement the suggested version. - /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}", - /* .expected_output= */ "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", - /* .expected_output_jinja= */ "GPT4 Correct System: You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", - }, - { - /* .name= */ "deepseek-ai/deepseek-coder-33b-instruct", - /* .template_str= */ "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}", - /* .expected_output= */ "You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n I am an assistant \n<|EOT|>\n### Instruction:\nAnother question\n### Response:\n", - /* .expected_output_jinja= */ "", - }, - { - /* .name= */ "eachadea/vicuna-13b-1.1", - // No template included in tokenizer_config.json, so this template likely needs to be manually set. - /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", - /* .expected_output= */ "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "Orca-Vicuna", - // No template included in tokenizer_config.json, so this template likely needs to be manually set. - /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", - /* .expected_output= */ "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "CohereForAI/c4ai-command-r-plus", - /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}", - /* .expected_output= */ "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", - /* .expected_output_jinja= */ "", - }, - { - /* .name= */ "Llama-3", - /* .template_str= */ "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}", - /* .expected_output= */ "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", - /* .expected_output_jinja= */ "", - }, - { - /* .name= */ "Phi-3-mini", - /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", - /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", - }, - { - /* .name= */ "Phi-3-small", - /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}", - /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", - /* .expected_output_jinja= */ "", - }, - { - /* .name= */ "Phi-3-medium", - /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", - /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", - }, - { - /* .name= */ "Phi-3-vision", - /* .template_str= */ "{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %}", - /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "ChatGLM3", - /* .template_str= */ "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}", - /* .expected_output= */ "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n I am an assistant <|user|>\n Another question<|assistant|>", - /* .expected_output_jinja= */ "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n I am an assistant <|user|>\n Another question<|assistant|>", - }, - { - /* .name= */ "ChatGLM4", - /* .template_str= */ U8C("[gMASK]{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"), - /* .expected_output= */ "[gMASK]<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>\n", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "GLMEdge", - /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>", - /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>", - /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF", - /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + ''}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"), - /* .expected_output= */ U8C("You are a helpful assistant<用户>HelloHi there<用户>Who are youI am an assistant<用户>Another question"), - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "DeepSeek-V2", - /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}", - /* .expected_output= */ U8C("You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<|end▁of▁sentence|>User: Who are you\n\nAssistant: I am an assistant <|end▁of▁sentence|>User: Another question\n\nAssistant:"), - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "<|end▁of▁sentence|>", - }, - { - /* .name= */ "ibm-granite/granite-3.0-8b-instruct", - /* .template_str= */ "{%- if tools %}\n {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n {%- for tool in tools %}\n {{- tool | tojson(indent=4) }}\n {%- if not loop.last %}\n {{- '\n\n' }}\n {%- endif %}\n {%- endfor %}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'user' %}\n {{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant' %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant_tool_call' %}\n {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'tool_response' %}\n {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- endif %}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n {%- endif %}\n{%- endfor %}", - /* .expected_output= */ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>", - /* .expected_output_jinja= */ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>", - }, - { - /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (mistralai 'v1' template with a system prompt)", - /* .template_str= */ "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n", - /* .expected_output= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there [INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", - /* .expected_output_jinja= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there [INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "Mistral-Large-Instruct-2407 (mistralai 'v3' template; modified to have system prompt at start)", - /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- \"[AVAILABLE_TOOLS] [\" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{\"type\": \"function\", \"function\": {' }}\n {%- for key, val in tool.items() if key != \"return\" %}\n {%- if val is string %}\n {{- '\"' + key + '\": \"' + val + '\"' }}\n {%- else %}\n {{- '\"' + key + '\": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"[/AVAILABLE_TOOLS]\" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n {%- else %}\n {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n {%- endif %}\n {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n {{- \"[TOOL_CALLS] [\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- \" \" + message[\"content\"]|trim + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n", - /* .expected_output= */ "[INST] You are a helpful assistant\n\nHello[/INST] Hi there[INST] Who are you[/INST] I am an assistant[INST] Another question[/INST]", - /* .expected_output_jinja= */ "[INST] Hello[/INST] Hi there[INST] Who are you[/INST] I am an assistant[INST] You are a helpful assistant\n\nAnother question[/INST]", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "Mistral-Nemo-Instruct-2407 (mistralai 'v3-tekken' template; modified to have system prompt at start)", - /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- \"[AVAILABLE_TOOLS][\" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{\"type\": \"function\", \"function\": {' }}\n {%- for key, val in tool.items() if key != \"return\" %}\n {%- if val is string %}\n {{- '\"' + key + '\": \"' + val + '\"' }}\n {%- else %}\n {{- '\"' + key + '\": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"[/AVAILABLE_TOOLS]\" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- \"[INST]\" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n {%- else %}\n {{- \"[INST]\" + message[\"content\"] + \"[/INST]\" }}\n {%- endif %}\n {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n {{- \"[TOOL_CALLS][\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- message[\"content\"] + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS]{\"content\": ' + content|string + \", \" }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n", - /* .expected_output= */ "[INST]You are a helpful assistant\n\nHello[/INST]Hi there[INST]Who are you[/INST] I am an assistant [INST]Another question[/INST]", - /* .expected_output_jinja= */ "[INST]Hello[/INST]Hi there[INST]Who are you[/INST] I am an assistant [INST]You are a helpful assistant\n\nAnother question[/INST]", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "mistralai/Mistral-Large-Instruct-2411 (mistralai 'v7' template)", - /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}", - /* .expected_output= */ "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT][INST] Hello[/INST] Hi there[INST] Who are you[/INST] I am an assistant [INST] Another question[/INST]", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "ai-sage/GigaChat-20B-A3B-instruct", - /* .template_str= */ "{% if messages[0]['role'] == 'system' -%}\n {%- set loop_messages = messages[1:] -%}\n {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n {%- set loop_messages = messages -%}\n {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n \n {%- if loop.index0 == 0 -%}\n {{ system_message -}}\n {%- endif -%}\n {%- if message['role'] == 'user' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if message['role'] == 'assistant' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if loop.last and add_generation_prompt -%}\n {{ 'assistant' + additional_special_tokens[0] -}}\n {%- endif -%}\n{%- endfor %}", - /* .expected_output= */ "You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|> I am an assistant <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - /* .supported_with_jinja= */ false, // Requires additional_special_tokens as extra context - }, - { - /* .name= */ "Infinigence/Megrez-3B-Instruct", - /* .template_str= */ U8C("{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"), - /* .expected_output= */ "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|> I am an assistant <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "phi-4", - /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|><|im_start|>assistant<|im_sep|>'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}", - /* .expected_output= */ "<|im_start|>system<|im_sep|>You are a helpful assistant<|im_end|><|im_start|>user<|im_sep|>Hello<|im_end|><|im_start|>assistant<|im_sep|>Hi there<|im_end|><|im_start|>user<|im_sep|>Who are you<|im_end|><|im_start|>assistant<|im_sep|> I am an assistant <|im_end|><|im_start|>user<|im_sep|>Another question<|im_end|><|im_start|>assistant<|im_sep|>", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct", - /* .template_str= */ "{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n {%- set name = tool.function.name %}\n {%- set description = tool.function.description|default('') %}\n {%- set parameters = tool.function.parameters|tojson %}\n {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n {{- tools_prefix }}\n {%- for tool in tools %}\n {{- __render_tool(tool) }}\n {%- endfor %}\n {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n {{- names.assistant }}\n {%- set call = message['function_call'] %}\n {%- if call %}\n {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n {%- else %}\n {{- ' ' + message.content + '\\n\\n' }}\n {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {{- __render_user_message(message) }}\n {%- endif %}\n {%- if message.role == 'assistant' and not loop.last %}\n {{- __render_assistant_message(message) }}\n {%- endif %}\n {%- if message.role == 'tool' %}\n {{- __render_tool_message(message) }}\n {%- endif %}\n {%- if loop.last %}\n {{- ' Ассистент:[SEP]' }}\n {%- endif %}\n{%- endfor %}\n", - /* .expected_output= */ " Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]", - /* .expected_output_jinja= */ " Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "inclusionAI/Ling-lite", - /* .template_str */ "{% for message in messages %}{% set role = message['role'] | lower %}{% if role == 'user' %}{% set role = 'HUMAN' %}{% endif %}{% set role = role | upper %}{{ '' + role + '' + message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT' }}{% endif %}", - /* .expected_output= */ "SYSTEMYou are a helpful assistantHUMANHelloASSISTANTHi thereHUMANWho are youASSISTANT I am an assistant HUMANAnother questionASSISTANT", - /* .expected_output_jinja= */ "", - /* .bos_token= */ "", - /* .eos_token= */ "", - }, - { - /* .name= */ "ByteDance-Seed/Seed-OSS-36B-Instruct", - /* .template_str */ "{# #}{%- for message in messages %}{%- if message.role in [\"user\", \"system\"] %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- elif message.role == \"assistant\" %}{{ bos_token + message.role }}{%- if message.content is defined and message.content is string and message.content|trim|length > 0 %}{{ \"\\n\" + message.content|trim + eos_token }}{%- endif %}{%- else %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{ bos_token + \"assistant\\n\" }}{%- endif %}", - /* .expected_output= */ "system\nYou are a helpful assistantuser\nHelloassistant\nHi thereuser\nWho are youassistant\nI am an assistantuser\nAnother questionassistant\n", - /* .expected_output_jinja= */ "system\nYou are a helpful assistantuser\nHelloassistant\nHi thereuser\nWho are youassistant\nI am an assistantuser\nAnother questionassistant\n", - /* .bos_token= */ "", - /* .eos_token= */ "", - } - }; - std::vector formatted_chat(1024); - int32_t res; - - // list all supported templates - std::vector supported_tmpl; - res = llama_chat_builtin_templates(nullptr, 0); - assert(res > 0); - supported_tmpl.resize(res); - res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size()); - std::cout << "Built-in chat templates:\n"; - for (auto tmpl : supported_tmpl) { - std::cout << " " << tmpl << "\n"; - } - - // test invalid chat template - res = llama_chat_apply_template("INVALID TEMPLATE", conversation.data(), conversation.size(), true, formatted_chat.data(), formatted_chat.size()); - assert(res < 0); - const auto add_generation_prompt = true; - - for (const auto & test_case : test_cases) { - std::cout << "\n\n=== " << test_case.name << " ===\n\n"; - formatted_chat.resize(1024); - res = llama_chat_apply_template( - test_case.template_str.c_str(), - conversation.data(), - conversation.size(), - add_generation_prompt, - formatted_chat.data(), - formatted_chat.size() - ); - formatted_chat.resize(res); - std::string output(formatted_chat.data(), formatted_chat.size()); - if (output != test_case.expected_output) { - std::cout << "Expected:\n" << test_case.expected_output << "\n"; - std::cout << "-------------------------\n"; - std::cout << "Actual:\n" << output << "\n"; - std::cout.flush(); - assert(output == test_case.expected_output); - } - } - - std::vector messages; - for (const auto & msg : conversation) { - messages.push_back(simple_msg(msg.role, msg.content)); - } - for (const auto & test_case : test_cases) { - if (!test_case.supported_with_jinja) { - continue; - } - std::cout << "\n\n=== " << test_case.name << " (jinja) ===\n\n"; - try { - auto output = format_using_common( - test_case.template_str, - test_case.bos_token, - test_case.eos_token, - messages); - auto expected_output = normalize_newlines(test_case.expected_output_jinja.empty() ? test_case.expected_output : test_case.expected_output_jinja); - if (output != expected_output) { - std::cout << "Template:```\n" << test_case.template_str << "\n```"; - std::cout << "-------------------------\n"; - std::cout << "Expected:```\n" << expected_output << "\n```"; - std::cout << "-------------------------\n"; - std::cout << "Actual:```\n" << output << "\n```"; - std::cout.flush(); - assert(output == expected_output); - } - } catch (const std::exception & e) { - std::cerr << "ERROR: " << e.what() << "\n"; - assert(false); - } - } - - // TODO: llama_chat_format_single will be deprecated, remove these tests later - - // test llama_chat_format_single for system message - std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n"; - std::vector chat2; - auto sys_msg = simple_msg("system", "You are a helpful assistant"); - - auto fmt_sys = [&](std::string tmpl_str) { - auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str); - auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false); - std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n"; - std::cout << "-------------------------\n"; - return output; - }; - assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n"); - assert(fmt_sys("mistral-v1") == " [INST] You are a helpful assistant\n\n"); - assert(fmt_sys("mistral-v3") == "[INST] You are a helpful assistant\n\n"); - assert(fmt_sys("mistral-v3-tekken") == "[INST]You are a helpful assistant\n\n"); - assert(fmt_sys("mistral-v7") == "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT]"); - assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n"); - assert(fmt_sys("llama2-sys") == "[INST] <>\nYou are a helpful assistant\n<>\n\n"); - assert(fmt_sys("mistral") == "[INST] You are a helpful assistant\n"); // for old pre-v1 templates - assert(fmt_sys("gemma") == ""); // for gemma, system message is merged with user message - assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>"); - assert(fmt_sys("gigachat") == "You are a helpful assistant<|message_sep|>"); - - - // test llama_chat_format_single for user message - std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n"; - chat2.push_back(simple_msg("system", "You are a helpful assistant")); - chat2.push_back(simple_msg("user", "Hello")); - chat2.push_back(simple_msg("assistant", "I am assistant")); - auto new_msg = simple_msg("user", "How are you"); - - auto fmt_single = [&](const std::string & tmpl_str) { - auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str()); - auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false); - std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n"; - std::cout << "-------------------------\n"; - return output; - }; - assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n"); - assert(fmt_single("mistral-v1") == " [INST] How are you [/INST]"); - assert(fmt_single("mistral-v3") == "[INST] How are you[/INST]"); - assert(fmt_single("mistral-v3-tekken") == "[INST]How are you[/INST]"); - assert(fmt_single("mistral-v7") == "[INST] How are you[/INST]"); - assert(fmt_single("llama2") == "[INST] How are you [/INST]"); - assert(fmt_single("mistral") == "[INST] How are you [/INST]"); // for old pre-v1 templates - assert(fmt_single("gemma") == "\nuser\nHow are you\nmodel\n"); - assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"); - // assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>"); - - std::cout << "\nOK: All tests passed successfully.\n"; - - return 0; -} diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4378a8db71..ad2953f6da 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -5,18 +5,20 @@ // // cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null // +#include "../src/llama-grammar.h" +#include "../src/unicode.h" +#include "chat-auto-parser.h" #include "chat.h" - +#include "common.h" +#include "ggml.h" #include "log.h" -#include "../src/unicode.h" -#include "../src/llama-grammar.h" - -#include - +#include #include -#include #include +#include +#include +#include #include using json = nlohmann::ordered_json; @@ -33,6 +35,7 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg_diff & os << "}"; return os; } + // operator<< for vector: static std::ostream & operator<<(std::ostream & os, const std::vector & diffs) { os << "[\n"; @@ -42,6 +45,7 @@ static std::ostream & operator<<(std::ostream & os, const std::vector -bool equals(const common_chat_msg & expected, const common_chat_msg & actual) { +template <> bool equals(const common_chat_msg & expected, const common_chat_msg & actual) { return normalize(expected) == normalize(actual); } template static void assert_equals(const T & expected, const T & actual) { if (!equals(expected, actual)) { - std::cerr << "Expected:```\n" << expected << "\n```" << std::endl; - std::cerr << "Actual:```\n" << actual << "\n```" << std::endl; - std::cerr << std::flush; + std::ostringstream oss_expected; + oss_expected << expected; + std::ostringstream oss_actual; + oss_actual << actual; + LOG_ERR("Expected: %s\n", oss_expected.str().c_str()); + LOG_ERR("Actual: %s\n", oss_actual.str().c_str()); + common_log_flush(common_log_main()); throw std::runtime_error("Test failed"); } } static std::string read_file(const std::string & path) { - std::cerr << "# Reading: " << path << '\n' << std::flush; std::ifstream fs(path, std::ios_base::binary); if (!fs.is_open()) { fs = std::ifstream("../" + path, std::ios_base::binary); @@ -146,11 +151,13 @@ static std::string renormalize_json(const std::string & json_str) { auto json_obj = json::parse(json_str); return json_obj.dump(); } catch (const std::exception & e) { - std::cerr << "Failed to parse JSON: " << e.what() << '\n'; - return json_str; + return ""; // ignore parial JSON contents for comparison purposes } } -static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) { + +static void assert_msg_equals(const common_chat_msg & expected, + const common_chat_msg & actual, + bool ignore_whitespace_differences = false) { assert_equals(expected.role, actual.role); if (ignore_whitespace_differences) { assert_equals(string_strip(expected.content), string_strip(actual.content)); @@ -183,7 +190,7 @@ static void assert_msg_equals(const common_chat_msg & expected, const common_cha } } -common_chat_tool special_function_tool { +static common_chat_tool special_function_tool{ /* .name = */ "special_function", /* .description = */ "I'm special", /* .parameters = */ R"({ @@ -197,7 +204,7 @@ common_chat_tool special_function_tool { "required": ["arg1"] })", }; -common_chat_tool special_function_tool_with_optional_param { +static common_chat_tool special_function_tool_with_optional_param{ /* .name = */ "special_function_with_opt", /* .description = */ "I'm special but have optional stuff", /* .parameters = */ R"({ @@ -215,7 +222,7 @@ common_chat_tool special_function_tool_with_optional_param { "required": ["arg1"] })", }; -common_chat_tool python_tool { +static common_chat_tool python_tool{ /* .name = */ "python", /* .description = */ "an ipython interpreter", /* .parameters = */ R"({ @@ -229,44 +236,229 @@ common_chat_tool python_tool { "required": ["code"] })", }; -common_chat_tool code_interpreter_tool { - /* .name = */ "code_interpreter", - /* .description = */ "an ipython interpreter", + +static common_chat_tool html_tool{ + /* .name = */ "html", + /* .description = */ "an html validator", /* .parameters = */ R"({ "type": "object", "properties": { - "code": { + "markup": { "type": "string", - "description": "Python code to execute." + "description": "HTML markup to validate." } }, - "required": ["code"] + "required": ["markup"] })", }; -std::vector tools { special_function_tool, special_function_tool_with_optional_param, python_tool }; -std::vector llama_3_1_tools { special_function_tool, code_interpreter_tool }; + +static common_chat_tool get_time_tool{ + /* .name = */ "get_time", + /* .description = */ "Get the current time in a city", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "City name" + } + }, + "required": ["city"] + })", +}; + +static common_chat_tool get_weather_tool{ + /* .name = */ "get_weather", + /* .description = */ "Get the current weather in a city", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "City name" + } + }, + "required": ["city"] + })", +}; + +static common_chat_tool todo_list{ + /* .name = */ "todo_list", + /* .description = */ "Create or update the todo list", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "todos": { + "type": "array", + "description": "List of TODO list items" + } + }, + "required": ["todos"] + })", +}; + +static common_chat_tool edit_tool{ + /* .name = */ "edit", + /* .description = */ "Edit file", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "filename": { + "type": "string", + "description": "Path of file to edit" + }, + "oldString": { + "type": "string", + "description": "String to replace" + }, + "newString": { + "type": "string", + "description": "New (replacement) value" + } + }, + "required": ["filename", "oldString", "newString"] + })", +}; + +static std::vector tools{ special_function_tool, special_function_tool_with_optional_param, + python_tool, html_tool, todo_list }; + +const common_chat_msg message_user{ + "user", + "Hey there!", + /* .content_parts = */ {}, + /* .tool_calls = */ {}, + /* .reasoning_content = */ "", + /* .tool_name = */ "", + /* .tool_call_id = */ "", +}; + +const common_chat_msg message_user_parts{ + "user", + /* .content = */ "", + /* .content_parts = */ + { + { "text", "Hey" }, + { "text", "there" }, + }, + /* .tool_calls = */ + { }, + /* .reasoning_content = */ + "", + /* .tool_name = */ "", + /* .tool_call_id = */ "", +}; + +static common_chat_msg simple_assist_msg(const std::string & content, + const std::string & reasoning_content = "", + const std::string & tool_name = "", + const std::string & arguments = "", + const std::string & id = "") { + common_chat_msg msg; + msg.role = "assistant"; + msg.content = content; + msg.reasoning_content = reasoning_content; + if (!tool_name.empty() || !id.empty()) { + msg.tool_calls.push_back({ tool_name, arguments, id }); + } + return msg; +} + +static common_chat_msg message_with_tool_calls(const std::string & tool_name, const std::string & arguments) { + return simple_assist_msg("", "", tool_name, arguments); +} + +static common_chat_msg message_with_tool_calls_and_reasoning(const std::string & tool_name, + const std::string & arguments, + const std::string & reasoning) { + return simple_assist_msg("", reasoning, tool_name, arguments); +} + +static common_chat_msg message_with_reasoning_content_and_multiple_tool_calls( + const std::string & reasoning, + const std::string & content, + const std::vector> & tool_calls) { + common_chat_msg msg; + msg.role = "assistant"; + msg.content = content; + msg.reasoning_content = reasoning; + for (const auto & [name, args] : tool_calls) { + msg.tool_calls.push_back({ name, args, "" }); + } + return msg; +} + +static common_chat_msg message_with_content_and_tool_call(const std::string & content, + const std::string & tool_name, + const std::string & arguments) { + return simple_assist_msg(content, "", tool_name, arguments); +} + +static common_chat_msg message_with_reasoning_and_tool_call(const std::string & reasoning, + const std::string & tool_name, + const std::string & arguments) { + return simple_assist_msg("", reasoning, tool_name, arguments); +} + +const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?"); +const common_chat_msg message_assist_empty = simple_assist_msg(""); +const common_chat_msg message_assist_thoughts_unparsed_deepseek = + simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); +const common_chat_msg message_assist_thoughts_unparsed_md = + simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}```"); +const common_chat_msg message_assist_thoughts_unparsed_md_partial = + simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}"); + +const common_chat_msg message_assist_thoughts_unparsed_r7b = + simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?"); +const common_chat_msg message_assist_thoughts_unparsed_magistral = + simple_assist_msg("[THINK]raisonnement[/THINK]Réponse"); +const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"); +const common_chat_msg message_assist_thoughts_unopened_unparsed = + simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); +const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking"); +const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_noopt = + simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_withopt = + simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}"); +const common_chat_msg message_assist_call_content = + simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}"); +const common_chat_msg message_assist_call_empty_args = simple_assist_msg("", "", "special_function"); +const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg"); +const common_chat_msg message_assist_call_thoughts = + simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}"); +const common_chat_msg message_assist_call_thoughts_unparsed = + simple_assist_msg("I'm\nthinking\n\n", "", "special_function", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_thoughts_content = + simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_id = + simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789"); +const common_chat_msg message_assist_call_idx = + simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0"); +const common_chat_msg message_assist_thoughts_call_idx = + simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0"); +const common_chat_msg message_assist_thoughts_partial_call = + simple_assist_msg("", "I'm\nthinking", "", "", /* id = */ "0"); +const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}"); +const common_chat_msg message_assist_call_python_lines = + simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}"); +const common_chat_msg message_assist_call_python_lines_unclosed = + simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')"); +const common_chat_msg message_assist_json_content = + simple_assist_msg("{\n \"response\": \"Hello, world!\\nWhat's up?\"\n}"); struct delta_data { std::string delta; common_chat_params params; }; -static common_chat_msg simple_assist_msg(const std::string & content, const std::string & reasoning_content = "", const std::string & tool_name = "", const std::string & arguments = "", const std::string & id = "") { - common_chat_msg msg; - msg.role = "assistant"; - msg.content = content; - msg.reasoning_content = reasoning_content; - if (!tool_name.empty()) { - msg.tool_calls.push_back({ tool_name, arguments, id }); - } - return msg; -} - -static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector & end_tokens, - const common_chat_msg & user_message, - const common_chat_msg & delta_message, +static delta_data init_delta(const struct common_chat_templates * tmpls, + const std::vector & end_tokens, + const common_chat_msg & user_message, + const common_chat_msg & delta_message, const std::vector & tools, - const common_chat_tool_choice & tool_choice) { + const common_chat_tool_choice & tool_choice) { common_chat_templates_inputs inputs; inputs.parallel_tool_calls = true; inputs.messages.push_back(user_message); @@ -317,20 +509,27 @@ static delta_data init_delta(const struct common_chat_templates * tmpls, const s gets the diff, removes any end tokens and parses the result w/ the grammar, checking that the parsed message is the same as the test_message */ -static void test_templates(const struct common_chat_templates * tmpls, const std::vector & end_tokens, - const common_chat_msg & test_message, - const std::vector & tools = {}, - const std::string & expected_delta = "", - bool expect_grammar_triggered = true, - bool test_grammar_if_triggered = true, - common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE, - bool ignore_whitespace_differences = false - ) { +static void test_templates(const struct common_chat_templates * tmpls, + const std::vector & end_tokens, + const common_chat_msg & test_message, + const std::vector & tools = {}, + const std::string & expected_delta = "", + bool expect_grammar_triggered = true, + bool test_grammar_if_triggered = true, + common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE, + bool ignore_whitespace_differences = false) { common_chat_msg user_message; - user_message.role = "user"; + user_message.role = "user"; user_message.content = "Hello, world!"; - for (const auto & tool_choice : std::vector {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) { + common_chat_templates_inputs inputs_tools; + inputs_tools.messages = { message_user }; + inputs_tools.tools = { special_function_tool }; + + common_chat_params params = common_chat_templates_apply(tmpls, inputs_tools); + + for (const auto & tool_choice : + std::vector{ COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED }) { auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice); if (!expected_delta.empty()) { if (ignore_whitespace_differences) { @@ -343,8 +542,12 @@ static void test_templates(const struct common_chat_templates * tmpls, const std if (expect_grammar_triggered) { // TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time common_chat_parser_params params; - params.format = data.params.format; + params.format = data.params.format; params.reasoning_format = reasoning_format; + if (!params.parser.empty()) { + syntax.parser = common_peg_arena(); + syntax.parser.load(params.parser); + } const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params); assert_msg_equals(test_message, msg, ignore_whitespace_differences); } @@ -358,43 +561,43 @@ static void test_templates(const struct common_chat_templates * tmpls, const std throw std::runtime_error("Failed to build grammar"); } auto earliest_trigger_pos = std::string::npos; - auto constrained = data.delta; + auto constrained = data.delta; for (const auto & trigger : data.params.grammar_triggers) { - size_t pos = std::string::npos; + size_t pos = std::string::npos; std::smatch match; switch (trigger.type) { case COMMON_GRAMMAR_TRIGGER_TYPE_WORD: - { - const auto & word = trigger.value; - pos = constrained.find(word); - break; - } + { + const auto & word = trigger.value; + pos = constrained.find(word); + break; + } case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN: - { - const auto & pattern = trigger.value; - if (std::regex_search(constrained, match, std::regex(pattern))) { - pos = match.position(1); + { + const auto & pattern = trigger.value; + if (std::regex_search(constrained, match, std::regex(pattern))) { + pos = match.position(1); + } + break; } - break; - } case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL: - { - const auto & pattern = trigger.value; - if (std::regex_match(constrained, match, std::regex(pattern))) { - auto mpos = std::string::npos; - for (size_t i = 1; i < match.size(); ++i) { - if (match[i].length() > 0) { - mpos = match.position(i); - break; + { + const auto & pattern = trigger.value; + if (std::regex_match(constrained, match, std::regex(pattern))) { + auto mpos = std::string::npos; + for (size_t i = 1; i < match.size(); ++i) { + if (match[i].length() > 0) { + mpos = match.position(i); + break; + } } + if (mpos == std::string::npos) { + mpos = match.position(0); + } + pos = mpos; } - if (mpos == std::string::npos) { - mpos = match.position(0); - } - pos = mpos; + break; } - break; - } default: throw std::runtime_error("Unknown trigger type"); } @@ -407,7 +610,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std } auto grammar_triggered = false; if (earliest_trigger_pos != std::string::npos) { - constrained = constrained.substr(earliest_trigger_pos); + constrained = constrained.substr(earliest_trigger_pos); grammar_triggered = true; } if (data.params.grammar_lazy) { @@ -416,8 +619,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) { throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta + - "\n\nConstrained: " + constrained + - "\n\nGrammar: " + data.params.grammar); + "\n\nConstrained: " + constrained + "\n\nGrammar: " + data.params.grammar); } } } @@ -431,24 +633,31 @@ template static void test_parser_with_streaming(const common_chat_msg & expected, const std::string & raw_message, T parse_msg) { constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t { auto len = s.size(); - if (len == 0) return 0; + if (len == 0) { + return 0; + } auto i = len; for (size_t back = 0; back < 4 && i > 0; ++back) { --i; unsigned char c = s[i]; if ((c & 0x80) == 0) { return len; - } else if ((c & 0xC0) == 0xC0) { + } + if ((c & 0xC0) == 0xC0) { size_t expected_len = 0; - if ((c & 0xE0) == 0xC0) expected_len = 2; - else if ((c & 0xF0) == 0xE0) expected_len = 3; - else if ((c & 0xF8) == 0xF0) expected_len = 4; - else return i; - if (len - i >= expected_len) { - return len; + if ((c & 0xE0) == 0xC0) { + expected_len = 2; + } else if ((c & 0xF0) == 0xE0) { + expected_len = 3; + } else if ((c & 0xF8) == 0xF0) { + expected_len = 4; } else { return i; } + if (len - i >= expected_len) { + return len; + } + return i; } } return len - std::min(len, size_t(3)); @@ -457,14 +666,14 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s return s.substr(0, utf8_truncate_safe_len(s)); }; - auto merged = simple_assist_msg(""); + auto merged = simple_assist_msg(""); auto last_msg = parse_msg(""); for (size_t i = 1; i <= raw_message.size(); ++i) { auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i)))); if (curr_msg == simple_assist_msg("")) continue; - LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str()); + LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str()); for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) { - LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str()); + LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str()); if (!diff.reasoning_content_delta.empty()) { merged.reasoning_content += diff.reasoning_content_delta; } @@ -473,14 +682,14 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s } if (diff.tool_call_index != std::string::npos) { if (!diff.tool_call_delta.name.empty()) { - merged.tool_calls.push_back({diff.tool_call_delta.name, "", ""}); + merged.tool_calls.push_back({ diff.tool_call_delta.name, "", "" }); } if (!diff.tool_call_delta.arguments.empty()) { GGML_ASSERT(!merged.tool_calls.empty()); merged.tool_calls.back().arguments += diff.tool_call_delta.arguments; } } - LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str()); + LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str()); } assert_msg_equals(curr_msg, merged, true); last_msg = curr_msg; @@ -489,99 +698,90 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s assert_msg_equals(expected, merged, true); } -const common_chat_msg message_user { - "user", - "Hey there!", - /* .content_parts = */ {}, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "", - /* .tool_call_id = */ "", -}; - -const common_chat_msg message_user_parts { - "user", - /* .content = */ "", - /* .content_parts = */ { - { "text", "Hey" }, - { "text", "there" }, - }, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "", - /* .tool_call_id = */ "", -}; - -const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?"); -const common_chat_msg message_assist_empty = simple_assist_msg(""); -const common_chat_msg message_assist_thoughts_unparsed_deepseek = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); -const common_chat_msg message_assist_thoughts_unparsed_md = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}```"); -const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}"); - -const common_chat_msg message_assist_thoughts_unparsed_r7b = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?"); -const common_chat_msg message_assist_thoughts_unparsed_magistral = simple_assist_msg("[THINK]raisonnement[/THINK]Réponse"); -const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"); -const common_chat_msg message_assist_thoughts_unopened_unparsed = simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"); -const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking"); -const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"); -const common_chat_msg message_assist_call_noopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}"); -const common_chat_msg message_assist_call_withopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}"); -const common_chat_msg message_assist_call_content = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}"); -const common_chat_msg message_assist_call_empty_args = simple_assist_msg("", "", "special_function"); -const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg"); -const common_chat_msg message_assist_call_thoughts = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}"); -const common_chat_msg message_assist_call_thoughts_unparsed = simple_assist_msg("I'm\nthinking\n\n", "", "special_function", "{\"arg1\": 1}"); -const common_chat_msg message_assist_call_thoughts_content = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"); -const common_chat_msg message_assist_call_id = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789"); -const common_chat_msg message_assist_call_idx = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0"); -const common_chat_msg message_assist_thoughts_call_idx = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0"); -const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}"); -const common_chat_msg message_assist_call_python_lines = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}"); -const common_chat_msg message_assist_call_python_lines_unclosed = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')"); -const common_chat_msg message_assist_call_code_interpreter = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}"); - // Use for PEG parser implementations struct peg_test_case { common_chat_templates_inputs params; - std::string input; - common_chat_msg expect; + std::string input; + common_chat_msg expect; + bool is_partial = false; }; struct make_peg_parser { common_chat_params params_; - common_peg_arena arena_; + common_peg_arena arena_; + bool detailed_debug_; - make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) { - params_ = common_chat_templates_apply(tmpls, inputs); + make_peg_parser(common_chat_templates * tmpls, + const common_chat_templates_inputs & inputs, + bool detailed_debug = false) { + detailed_debug_ = detailed_debug; + params_ = common_chat_templates_apply(tmpls, inputs); arena_.load(params_.parser); } common_chat_msg parse(const std::string & msg, bool is_partial) { - common_chat_parser_params parser_params; +common_chat_parser_params parser_params; parser_params.format = params_.format; return common_chat_peg_parse(arena_, msg, is_partial, parser_params); - } }; -static void test_peg_parser(common_chat_templates * tmpls, const std::function & init) { +static void test_peg_parser(common_chat_templates * tmpls, + const std::function & init, + bool detailed_debug) { + // UTF-8-safe truncation helper (same as in test_parser_with_streaming) + constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t { + auto len = s.size(); + if (len == 0) { + return 0; + } + auto i = len; + for (size_t back = 0; back < 4 && i > 0; ++back) { + --i; + unsigned char c = s[i]; + if ((c & 0x80) == 0) { + return len; + } + if ((c & 0xC0) == 0xC0) { + size_t expected_len = 0; + if ((c & 0xE0) == 0xC0) { + expected_len = 2; + } else if ((c & 0xF0) == 0xE0) { + expected_len = 3; + } else if ((c & 0xF8) == 0xF0) { + expected_len = 4; + } else { + return i; + } + if (len - i >= expected_len) { + return len; + } + return i; + } + } + return len - std::min(len, size_t(3)); + }; + peg_test_case tc; init(tc); if (tc.params.messages.empty()) { - tc.params.messages = {message_user}; + tc.params.messages = { message_user }; } if (tc.expect.role.empty()) { tc.expect.role = "assistant"; } - auto parser = make_peg_parser(tmpls, tc.params); + auto parser = make_peg_parser(tmpls, tc.params, detailed_debug); common_chat_msg msg_accum; common_chat_msg msg_prev; msg_accum.role = msg_prev.role = "assistant"; for (size_t i = 1; i <= tc.input.size(); ++i) { - auto is_partial = i < tc.input.size(); - common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial); + auto is_partial = i < tc.input.size() || tc.is_partial; + // Use UTF-8 safe truncation to avoid corrupting multi-byte characters + size_t safe_len = utf8_truncate_safe_len(std::string_view(tc.input).substr(0, i)); + std::string prefix = tc.input.substr(0, safe_len); + common_chat_msg msg_current = parser.parse(prefix, is_partial); for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) { if (!diff.reasoning_content_delta.empty()) { @@ -591,11 +791,21 @@ static void test_peg_parser(common_chat_templates * tmpls, const std::function tools) { + tc_.params.tools = std::move(tools); + return *this; + } + + peg_test_builder & enable_thinking(bool val) { + tc_.params.enable_thinking = val; + return *this; + } + + peg_test_builder & parallel_tool_calls(bool val) { + tc_.params.parallel_tool_calls = val; + return *this; + } + + peg_test_builder & json_schema(const std::string & schema) { + tc_.params.json_schema = schema; + return *this; + } + + peg_test_builder & is_partial(bool val) { + tc_.is_partial = val; + return *this; + } + + // Expect setters + peg_test_builder & expect(const common_chat_msg & msg) { + tc_.expect = msg; + return *this; + } + + peg_test_builder & expect_content(const std::string & content) { + tc_.expect.content = content; + return *this; + } + + peg_test_builder & expect_reasoning(const std::string & reasoning) { + tc_.expect.reasoning_content = reasoning; + return *this; + } + + peg_test_builder & expect_tool_calls(std::vector calls) { + tc_.expect.tool_calls = std::move(calls); + return *this; + } + + // Execute the test + void run() { + // Check template filter + if (!g_template_filter.empty()) { + // Case-insensitive substring match + std::string template_path_lower = tester_.template_path(); + std::string filter_lower = g_template_filter; + std::transform(template_path_lower.begin(), template_path_lower.end(), template_path_lower.begin(), + ::tolower); + std::transform(filter_lower.begin(), filter_lower.end(), filter_lower.begin(), ::tolower); + if (template_path_lower.find(filter_lower) == std::string::npos) { + // Skip this test + return; + } + } + LOG_DBG("\n================================\nRunning test for template: %s\n================================\n", + tester_.template_path().c_str()); + test_peg_parser(tester_.tmpls_.get(), [this](peg_test_case & t) { t = tc_; }, tester_.detailed_debug_); + } +}; + +peg_test_builder peg_tester::test(const std::string & input) { + return peg_test_builder(*this, input); +} + static void test_msgs_oaicompat_json_conversion() { - printf("[%s]\n", __func__); + LOG_DBG("%s\n", __func__); std::vector msgs{ message_user, message_user_parts, @@ -619,13 +938,12 @@ static void test_msgs_oaicompat_json_conversion() { message_assist_call_id, message_assist_call_idx, message_assist_call_python, - message_assist_call_code_interpreter, }; for (const auto & msg : msgs) { - auto oai_json = common_chat_msgs_to_json_oaicompat({msg}); + auto oai_json = common_chat_msgs_to_json_oaicompat({msg}); auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json); assert_equals((size_t) 1, msgs2.size()); - auto msg2 = msgs2[0]; + const auto & msg2 = msgs2[0]; assert_msg_equals(msg, msg2); } assert_equals( @@ -646,7 +964,7 @@ static void test_msgs_oaicompat_json_conversion() { " }\n" "]" ), - common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2)); + common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2)); assert_equals( std::string( @@ -666,7 +984,7 @@ static void test_msgs_oaicompat_json_conversion() { " }\n" "]" ), - common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2)); + common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2)); auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]")); assert_equals(1, res.size()); @@ -685,15 +1003,14 @@ static void test_msgs_oaicompat_json_conversion() { } static void test_tools_oaicompat_json_conversion() { - printf("[%s]\n", __func__); + LOG_DBG("%s\n", __func__); std::vector tools{ special_function_tool, python_tool, - code_interpreter_tool, }; for (const auto & tool : tools) { - auto oai_json = common_chat_tools_to_json_oaicompat({tool}); + auto oai_json = common_chat_tools_to_json_oaicompat({tool}); auto tools2 = common_chat_tools_parse_oaicompat(oai_json); assert_equals((size_t) 1, tools2.size()); auto tool2 = tools2[0]; @@ -726,7 +1043,7 @@ static void test_tools_oaicompat_json_conversion() { " }\n" "]" ), - common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2)); + common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2)); { auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse( @@ -1010,14 +1327,14 @@ static void test_template_output_parsers() { // Test parsing assert_msg_equals( simple_assist_msg("", "", "python", ""), - test_chat_parse( + common_chat_parse( "```json\n" " { \"name\" : \"python\"", /* is_partial= */ true, {COMMON_CHAT_FORMAT_HERMES_2_PRO})); assert_msg_equals( simple_assist_msg("Let's call something\n"), - test_chat_parse( + common_chat_parse( "Let's call something\n" "{\"name\"", /* is_partial= */ true, @@ -1027,7 +1344,7 @@ static void test_template_output_parsers() { })); assert_msg_equals( simple_assist_msg("Let's call something\n"), - test_chat_parse( + common_chat_parse( "Let's call something\n" "{\"name", /* is_partial= */ true, @@ -1036,7 +1353,7 @@ static void test_template_output_parsers() { /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, })); assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( + common_chat_parse( // QwQ-32B's template adds a trailing if add_generation_prompt "I'm\nthinking\n" "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", @@ -1049,580 +1366,39 @@ static void test_template_output_parsers() { })); assert_msg_equals( message_assist_call, - test_chat_parse( - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "Hello, world!\nWhat's up?\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "{\"arg1\": 1}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - "{\"arg1\": 1}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```xml\n" - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```xml\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```json\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "```", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "```json\n" - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n" - " \n" - "``` ", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\n" - " \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n" - " }\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - message_assist_call, - test_chat_parse( - "{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - - // Test multiple tool calls - common_chat_msg message_assist_multiple_calls; - message_assist_multiple_calls.role = "assistant"; - message_assist_multiple_calls.content = ""; - message_assist_multiple_calls.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""}); - message_assist_multiple_calls.tool_calls.push_back({"python", "{\"code\":\"print('hello')\"}", ""}); - - assert_msg_equals( - message_assist_multiple_calls, - test_chat_parse( - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "\n" - "\n" - "{\"name\": \"python\", \"arguments\": {\"code\":\"print('hello')\"}}\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - - assert_msg_equals( - message_assist_multiple_calls, - test_chat_parse( - "{\"arg1\": 1}\n" - "{\"code\":\"print('hello')\"}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - - assert_msg_equals( - simple_assist_msg( - "This is not a tool call:", - "", - "special_function", - "{\"arg1\": 1}"), - test_chat_parse( - "This is not a tool call:\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - // assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - // test_chat_parse( - // "I'm\nthinkingHello, world!\nWhat's up?", - // COMMON_CHAT_FORMAT_HERMES_2_PRO)); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_unparsed_md, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}```", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ true, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ false, - })); - assert_msg_equals(message_assist_thoughts_unparsed_md_partial, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?\n```json\n{}```", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ true, - /* .thinking_forced_open = */ false, - })); - assert_msg_equals(message_assist_thoughts_unopened_unparsed, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - ""); - - // Test multiple tool calls with template - common_chat_msg message_assist_multiple_calls_template; - message_assist_multiple_calls_template.role = "assistant"; - message_assist_multiple_calls_template.content = ""; - message_assist_multiple_calls_template.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""}); - message_assist_multiple_calls_template.tool_calls.push_back({"python", "{\"code\":\"print('test')\"}", ""}); - - test_templates(tmpls.get(), end_tokens, message_assist_multiple_calls_template, tools, - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "\n" - "\n" - "{\"name\": \"python\", \"arguments\": {\"code\":\"print('test')\"}}\n" - ""); - - test_templates(tmpls.get(), end_tokens, message_assist_call_python_lines, tools, - "\n" - "{\"name\": \"python\", \"arguments\": {\"code\":\"# This is a program:\\nprint('hey')\"}}\n" - ""); - assert_msg_equals( - simple_assist_msg("", /* reasoning_content= */ "nah uhg"), - test_chat_parse( - "nah uhg", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); + common_chat_parse( + "\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(simple_assist_msg("", "I should use a tool", "special_function", R"({"arg1": 1})")) + .run(); } + { - auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja"); - std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; + // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models) + auto tst = peg_tester("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", detailed_debug); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format); - assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - common_chat_templates_apply( - read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(), - inputs_tools_builtin) - .format); + tst.test( + "\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); - assert_equals( - message_assist_call, - test_chat_parse( - "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LLAMA_3_X})); + tst.test( + "Hello, world!\nWhat's up?\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call_content) + .run(); - // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools, - "<|python_tag|>code_interpreter.call(code=\"print('hey')\")"); - test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools, - "<|python_tag|>python.call(code=\"print('hey')\")"); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); - } - { - auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja"); - std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); - } - { - auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja"); - std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, - common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, - common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, - common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - - for (auto is_partial : { false, true }) { - assert_equals( - message_assist_call, - test_chat_parse( - "{\"arg1\": 1}", - is_partial, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1})); - } - - assert_equals( - message_assist_call, - test_chat_parse( - "{\"arg1\": 1}<", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1})); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "{\"arg1\": 1}"); - } - { - auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja"); - std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - assert_msg_equals( - simple_assist_msg( - "Hello, world!\nnono\nWhat's up?", - "", - "special_function", - "{\"arg1\": 1}"), - test_chat_parse( - "all\n" - "Hello, world!\n" - "nono\n" - "What's up?>>>special_function\n" - "{\"arg1\": 1}\n", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - assert_msg_equals(message_assist_call_python_lines, - test_chat_parse( - "python\n" - "# This is a program:\n" - "print('hey')", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - assert_msg_equals(message_assist_call_python_lines_unclosed, - test_chat_parse( - "python\n" - "# This is a program:\n" - "print('hey')", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - assert_msg_equals(message_assist_call, - test_chat_parse( - "special_function\n" - "{\"arg1\": 1} \n ", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - assert_msg_equals(message_assist, - test_chat_parse( - "all\n" - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2})); - - test_templates(tmpls.get(), end_tokens, message_assist, {}, - "all\n" - "Hello, world!\n" - "What's up?", - /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "special_function\n" - "{\"arg1\": 1}"); - } - { - auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja"); - std::vector end_tokens{ "<|eot_id|>" }; - - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]"); - } - { - // Original DeepSeek R1 template. Leaves <|tool▁calls▁begin|> and others unclosed. Our logic fixes the prompt. - auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja"); - std::vector end_tokens{ "<|end▁of▁sentence|>" }; - - for (const auto & inputs : { inputs_no_tools, inputs_tools }) { - auto params = common_chat_templates_apply(tmpls.get(), inputs); - assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, params.format); - assert_equals(true, params.thinking_forced_open); - } - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - assert_msg_equals( - simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - assert_msg_equals( - simple_assist_msg("", "I need to remember the correct syntax. It starts with <|tool▁calls▁begin|> and ends with"), - test_chat_parse( - "I need to remember the correct syntax. It starts with <|tool▁calls▁begin|> and ends with", - /* is_partial= */ true, - { - COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_unopened_unparsed, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - assert_msg_equals(message_assist_thoughts, - // Latest template update (ast of 20250209) adds a trailing \n if add_generation_prompt is true. - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - // test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - // "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - // "```json\n" - // "{\"arg1\": 1}\n" - // // Look what's not here: <|tool▁calls▁end|> (also missing the <|end▁of▁sentence|>, but that is removed lazily by the test's delta logic) - // "```<|tool▁call▁end|>", - // /* expect_grammar_triggered= */ true, - // /* test_grammar_if_triggered= */ false); - } - { - // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all. - auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja"); - std::vector end_tokens{ "<|end▁of▁sentence|>" }; - - assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_DEEPSEEK_R1})); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - - assert_msg_equals(message_assist_call_thoughts_unparsed, - test_chat_parse( - "I'm\nthinking\n\n" - "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - "```json\n" - "{\"arg1\": 1}\n" - "```<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_DEEPSEEK_R1})); - assert_msg_equals(message_assist_call, - test_chat_parse( - "<|tool▁calls|>function<|tool▁sep|>special_function\n" - "```json\n" - "{\"arg1\": 1}\n" - "```<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_DEEPSEEK_R1})); - - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking\n\n" - "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - "```json\n" - "{\"arg1\": 1}\n" - "```<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - "```json\n" - "{\"arg1\": 1}\n" - "```<|tool▁call▁end|><|tool▁calls▁end|>"); + // Note: Hermes template doesn't support thinking/reasoning natively + // Note: We only support one tool calling format per template, no alternate formats } { auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja"); @@ -1634,20 +1410,20 @@ static void test_template_output_parsers() { // Test parsing regular content assert_msg_equals(message_assist, - test_chat_parse( + common_chat_parse( "Hello, world!\nWhat's up?", /* is_partial= */ false, {COMMON_CHAT_FORMAT_GRANITE})); assert_msg_equals( message_assist, - test_chat_parse( + common_chat_parse( "Hello, world!\nWhat's up?", /* is_partial= */ true, {COMMON_CHAT_FORMAT_GRANITE})); // Test parsing content with thinking assert_msg_equals(message_assist_thoughts, - test_chat_parse( + common_chat_parse( "I'm\nthinkingHello, world!\nWhat's up?", /* is_partial= */ false, { @@ -1655,12 +1431,12 @@ static void test_template_output_parsers() { /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, })); assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - test_chat_parse( + common_chat_parse( "I'm\nthinkingHello, world!\nWhat's up?", /* is_partial= */ false, {COMMON_CHAT_FORMAT_GRANITE})); assert_msg_equals(message_assist_thoughts, - test_chat_parse( + common_chat_parse( "I'm\nthinkingHello, world!\nWhat's up?", /* is_partial= */ true, { @@ -1668,7 +1444,7 @@ static void test_template_output_parsers() { /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, })); assert_msg_equals(message_assist_thoughts, - test_chat_parse( + common_chat_parse( "I'm\nthinkingHello, world!\nWhat's up?", /* is_partial= */ false, { @@ -1676,12 +1452,12 @@ static void test_template_output_parsers() { /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, })); assert_msg_equals(simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"), - test_chat_parse( + common_chat_parse( "I'm\nthinkingHello, world!\nWhat's up?", /* is_partial= */ false, {COMMON_CHAT_FORMAT_GRANITE})); assert_msg_equals(message_assist_empty, - test_chat_parse( + common_chat_parse( "I'm\nthinking", /* is_partial= */ true, { @@ -1703,329 +1479,57 @@ static void test_template_output_parsers() { })); assert_msg_equals( message_assist_empty, - test_chat_parse( + common_chat_parse( "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals( - message_assist_call_empty_args, - test_chat_parse( - "<|tool_call|>[{\"name\": \"special_function\"", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals( - message_assist_call_cutoff_args, - test_chat_parse( - "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals( - message_assist_call_cutoff_args, - test_chat_parse( - "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); + // Note: Functionary and Firefunction have dedicated handlers, not tested with auto-parser - // Test parsing tool calls with thinking - assert_msg_equals( - message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - // TODO @ngxson : generic tool call should be removed in the future -#if 0 - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools, - "{\n" - " \"tool_calls\": [\n" - " {\n" - " \"name\": \"special_function\",\n" - " \"arguments\": {\n" - " \"arg1\": 1\n" - " },\n" - " \"id\": \"123456789\"\n" - " }\n" - " ],\n" - " \"content\": \"\"\n" - "}", - /* expect_grammar_triggered= */ false - ); -#endif - } { - auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja"); - std::vector end_tokens{ "<|return|>", "<|call|>" }; + // Test simple content-only template + auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja", detailed_debug); - assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - assert_msg_equals(simple_assist_msg("", "I'm\nthink"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthink", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - - // Test parse_tool_calls == false - assert_msg_equals( - simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ false, - })); - assert_msg_equals( - simple_assist_msg("", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ false, - })); - assert_msg_equals( - simple_assist_msg("", "I'm\nthinking"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ false, - })); - - // Test reasoning formats - assert_msg_equals( - simple_assist_msg( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, - })); - - assert_msg_equals( - simple_assist_msg( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - /* .reasoning_in_content = */ true, - })); - - // Test tool calling in role header - assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - " to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - " to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"), - test_chat_parse( - "<|channel|>analysis<|message|>I'm\nthinking<|end|>" - "<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); } + { - // Seed-OSS format tests - auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja"); - std::vector end_tokens{ "" }; + // IBM Granite (reasoning and tool calling model) + auto tst = peg_tester("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", detailed_debug); - assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); - // Test simple reasoning content - assert_msg_equals( - simple_assist_msg("Hello, world!", "I'm thinking about the answer"), - test_chat_parse( - "I'm thinking about the answerHello, world!", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + } - // Test budget reflection tags - common_chat_msg msg_budget_reflect; - msg_budget_reflect.role = "assistant"; - msg_budget_reflect.content = "Token usage: 45/1000\nI should continue thinking to find the best solution.I need to calculate this step by step."; - msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution."; - assert_msg_equals( - msg_budget_reflect, - test_chat_parse( - "Token usage: 45/1000\nI should continue thinking to find the best solution." - "Token usage: 45/1000\nI should continue thinking to find the best solution." - "I need to calculate this step by step.", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); + { + // ByteDance-Seed-OSS (reasoning and tool calling model) + auto tst = peg_tester("models/templates/ByteDance-Seed-OSS.jinja", detailed_debug); - // Test tool calls with Seed-OSS format - common_chat_msg msg_tool_call; - msg_tool_call.role = "assistant"; - msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""}); - assert_msg_equals( - msg_tool_call, - test_chat_parse( - "\n" - "\n" - "[1, 2, 3]\n" - "\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_SEED_OSS})); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - // Test reasoning + tool call combination - common_chat_msg msg_reasoning_tool; - msg_reasoning_tool.role = "assistant"; - msg_reasoning_tool.content = ""; - msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers"; - msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""}); - assert_msg_equals( - msg_reasoning_tool, - test_chat_parse( - "I need to calculate the sum of these numbers" - "\n" - "\n" - "[1, 2, 3]\n" - "\n" - "", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); + tst.test("I'm thinking about the answerHello, world!") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer")) + .run(); + + tst.test( + "\n" + "\n" + "1\n" + "\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); // Test deltas: the number of tool calls in partial parses should never decrease std::string tool_msg = "\n" @@ -2035,7 +1539,7 @@ static void test_template_output_parsers() { std::size_t previousToolCalls = 0; for (std::size_t i = std::string("").length(); i < tool_msg.length() - 1; i++) { auto partial = tool_msg.substr(0, i); - auto partial_res = test_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK }); + auto partial_res = common_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK }); if (partial_res.tool_calls.size() < previousToolCalls) { throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size())); } @@ -2048,1889 +1552,738 @@ static void test_template_output_parsers() { msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""}); assert_msg_equals( msg_multi_param, - test_chat_parse( - "\n" - "\n" - "test\n" - "json\n" - "\n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_SEED_OSS})); + common_chat_parse( + "\n" + "\n" + "1\n" + "\n" + "") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(simple_assist_msg("", "I need to call a function", "special_function", R"({"arg1": 1})")) + .run(); - // Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done - assert_msg_equals( - simple_assist_msg("", "", "calculate_sum", "{\"numbers\":"), - test_chat_parse( - "\n" - "\n" - "[1,\n", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_SEED_OSS})); + tst.test( + "\n" + "\n" + "1\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "2\n" + "\n" + "") + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); - // Test incomplete reasoning tag - assert_msg_equals( - simple_assist_msg("", "I was thinking"), - test_chat_parse( - "I was thinking", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); + tst.test( + "\n" + "\n" + "\n" + "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n" + "\n" + "\n" + "") + .tools({ + todo_list + }) + .expect_tool_calls({ + { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} }, + }) + .run(); - // Test content without reasoning - assert_msg_equals( - simple_assist_msg("This is a simple response without reasoning."), - test_chat_parse( - "This is a simple response without reasoning.", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_SEED_OSS})); - } - { - auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja"); - std::vector end_tokens{ "" }; + // single-quote normalization + tst.test( + "\n" + "\n" + "\n" + "[{'item': 'Check stuff', 'selected': false}, {'item': 'Prepare stuff', 'selected': true}]\n" + "\n" + "\n" + "") + .tools({ + todo_list + }) + .expect_tool_calls({ + { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} }, + }) + .run(); - assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_NEMOTRON_V2})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_NEMOTRON_V2})); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_NEMOTRON_V2} - )); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?\n", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", - /* expect_grammar_triggered= */ true - ); - } - { - auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja"); - std::vector end_tokens{ "<|end▁of▁sentence|>" }; - - for (const auto & inputs : { inputs_no_tools, inputs_tools }) { - auto params = common_chat_templates_apply(tmpls.get(), inputs); - assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format); - assert_equals(true, params.thinking_forced_open); - } - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - assert_msg_equals( - simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - // variant: thinking forced open, reasoning_format none - assert_msg_equals( - simple_assist_msg("REASONINGok", ""), - test_chat_parse( - "REASONINGok", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: happy path for when it works as the model card says it should - assert_msg_equals( - simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"), - test_chat_parse( - "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ true, - })); - // variant: simple + thinking open - assert_msg_equals( - simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"), - test_chat_parse( - "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: simple + multiple tool calls - common_chat_msg message_assist_multiple_calls; - message_assist_multiple_calls.role = "assistant"; - message_assist_multiple_calls.content = "CONTENT"; - message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""}); - message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""}); - assert_msg_equals( - message_assist_multiple_calls, - test_chat_parse( - "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ true, - })); - // variant: thinking forced open + tool call in reasoning content - assert_msg_equals( - simple_assist_msg("", "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}"), - test_chat_parse( - "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: thinking forced open + tool call in reasoning content + no closing think + not partial - // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting - // to make tool calls in reasoning content according to the model card, but it does sometimes, so - // add the reasoning content as regular content and parse the tool calls. - assert_msg_equals( - simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"), - test_chat_parse( - "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: thinking forced open + tool call in reasoning content + no closing think + partial - assert_msg_equals( - simple_assist_msg("", "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", "", ""), - test_chat_parse( - "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", - /* is_partial= */ true, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ true, - })); - // variant: thinking not forced open + missing reasoning + no tool calls - assert_msg_equals( - simple_assist_msg("CONTENT", ""), - test_chat_parse( - "CONTENT", - /* is_partial= */ false, - { - COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ false, - /* .parse_tool_calls = */ true, - })); - } - { - auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja"); - std::vector end_tokens{ "<|assistant_end|>" }; - - assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_APERTUS})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "<|inner_prefix|>I'm\nthinking<|inner_suffix|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_APERTUS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_APERTUS})); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_APERTUS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_APERTUS} - )); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_APERTUS, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>", - /* expect_grammar_triggered= */ true - ); - - // TODO @ngxson : not sure why this fails, but not very important for now - // assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get())); - } - { - // LFM2 format tests - auto tmpls = read_templates("models/templates/llama-cpp-lfm2.jinja"); - std::vector end_tokens{ "<|im_end|>" }; - - auto inputs_tools_forced_json_schema = std::invoke([&]() -> common_chat_templates_inputs { - common_chat_templates_inputs inputs; - inputs.messages = { - std::invoke([&]() -> common_chat_msg { - common_chat_msg msg; - msg.role = "system"; - msg.content = "force json schema.\n"; - return msg; - }), - message_user, - }; - inputs.tools = {special_function_tool}; - return inputs; - }); - - { - auto params = common_chat_templates_apply(tmpls.get(), inputs_no_tools); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format); - assert_equals(false, params.grammar_lazy); - assert_equals(std::string(R"(<|im_start|>user -Hey there!<|im_end|> -<|im_start|>assistant -)"), params.prompt); - } - - { - auto params = common_chat_templates_apply(tmpls.get(), inputs_tools); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format); - assert_equals(false, params.grammar_lazy); - assert_equals(std::string(R"(<|im_start|>system -List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|> -<|im_start|>user -Hey there!<|im_end|> -<|im_start|>assistant -)"), params.prompt); - assert_equals(true, params.grammar.empty()); - } - - { - auto params = common_chat_templates_apply(tmpls.get(), inputs_tools_forced_json_schema); - assert_equals(COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, params.format); - assert_equals(true, params.grammar_lazy); - assert_equals(std::string(R"(<|im_start|>system -List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|> -<|im_start|>user -Hey there!<|im_end|> -<|im_start|>assistant -)"), params.prompt); - assert_equals(false, params.grammar.empty()); - } - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test single tool call with JSON format - common_chat_msg msg_single_tool_call; - msg_single_tool_call.role = "assistant"; - msg_single_tool_call.tool_calls.push_back({"special_function", "{\"arg1\":1}", ""}); - assert_msg_equals( - msg_single_tool_call, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with string argument - common_chat_msg msg_tool_call_string; - msg_tool_call_string.role = "assistant"; - msg_tool_call_string.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""}); - assert_msg_equals( - msg_tool_call_string, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with multiple arguments - common_chat_msg msg_multi_args; - msg_multi_args.role = "assistant"; - msg_multi_args.tool_calls.push_back({"calculate", "{\"x\":10,\"y\":20,\"operation\":\"add\"}", ""}); - assert_msg_equals( - msg_multi_args, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"calculate\", \"arguments\": {\"x\": 10, \"y\": 20, \"operation\": \"add\"}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test multiple tool calls in single array - common_chat_msg msg_multiple_tools; - msg_multiple_tools.role = "assistant"; - msg_multiple_tools.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""}); - msg_multiple_tools.tool_calls.push_back({"get_time", "{\"timezone\":\"UTC\"}", ""}); - assert_msg_equals( - msg_multiple_tools, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}, {\"name\": \"get_time\", \"arguments\": {\"timezone\": \"UTC\"}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with content before - common_chat_msg msg_content_before_tool; - msg_content_before_tool.role = "assistant"; - msg_content_before_tool.content = "Let me check the weather for you."; - msg_content_before_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""}); - assert_msg_equals( - msg_content_before_tool, - test_chat_parse( - "Let me check the weather for you.<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with content after - common_chat_msg msg_content_after_tool; - msg_content_after_tool.role = "assistant"; - msg_content_after_tool.content = "Here's the result."; - msg_content_after_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""}); - assert_msg_equals( - msg_content_after_tool, - test_chat_parse( - "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>Here's the result.", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Test tool call with newlines (common in LLM output) - common_chat_msg msg_tool_call_newlines; - msg_tool_call_newlines.role = "assistant"; - msg_tool_call_newlines.tool_calls.push_back({"get_current_time", "{\"location\":\"Paris\"}", ""}); - assert_msg_equals( - msg_tool_call_newlines, - test_chat_parse( - "<|tool_call_start|>[{\n \"name\": \"get_current_time\",\n \"arguments\": {\n \"location\": \"Paris\"\n }\n}]<|tool_call_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS})); - - // Note: LFM2 uses JSON format for tool calls: [{"name": "...", "arguments": {...}}] - // Unlike other formats, LFM2 template does not render tool calls in conversation history, - // so we don't use test_templates() for tool call generation. Instead, the parsing tests - // above verify edge cases and format variations for the tool call output format. + // single-quote normalization and tool call with inside quotes + tst.test( + "\n" + "\n" + "\n" + "foo.cpp\n" + "\n" + "\n" + "def foo(arg = \"14\"):\n" + " return arg + \"bar\"\n" + "\n" + "\n" + "\n" + "def foo(arg = \"15\"):\n" + " pass\n" + "\n" + "\n" + "\n" + "") + .tools({ + edit_tool + }) + .expect_tool_calls({ + { "edit", "{\"filename\": \"foo.cpp\", " + "\"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", " + "\"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}", {} + } + }) + .run(); } { - auto tmpls = read_templates("models/templates/MiniMax-M2.jinja"); - std::vector end_tokens{ "[e~[" }; + // Qwen3-Coder (tool calling with XML-style format) + auto tst = peg_tester("models/templates/Qwen3-Coder.jinja", detailed_debug); - assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_MINIMAX_M2})); + tst.test( + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); + tst.test( + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "2\n" + "\n" + "\n" + "") + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "1", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_MINIMAX_M2})); + // Test with code content (multiline) + tst.test( + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + "") + .tools({ + python_tool + }) + .expect_tool_calls({ + { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} }, + }) + .run(); - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking1", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); + // Test with HTML tag content + tst.test( + "\n" + "\n" + "\n" + "\n" + " \n" + " Hello!\n" + " \n" + "\n" + "\n" + "\n" + "") + .tools({ + html_tool + }) + .expect_tool_calls({ + { "html", "{\"markup\": \"\\n \\n Hello!\\n \\n\"}", {} }, + }) + .run(); - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "1Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_MINIMAX_M2} - )); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "I'm\nthinking1Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test streaming - test_parser_with_streaming(message_assist_call_thoughts_content, - "I'm\nthinking\nHello, world!\nWhat's up?\n1", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_thoughts_unparsed, - "I'm\nthinking\n\n1", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - test_parser_with_streaming(message_assist_call_thoughts_content, - "I'm\nthinking\n\n\nHello, world!\nWhat's up?\n\n\n\n1\n\n\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_withopt, - "\n\n1\n2\n\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "\n\n1\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, - /* ignore_whitespace_differences= */ true - ); - - // Test template generation for tools with optional parameters - test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools, - "\n\n1\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, - /* ignore_whitespace_differences= */ true - ); - test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools, - "\n\n1\n2\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE, - /* ignore_whitespace_differences= */ true - ); + // Test with TODO list (array of objects) + tst.test( + "\n" + "\n" + "\n" + "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n" + "\n" + "\n" + "") + .tools({ + todo_list + }) + .expect_tool_calls({ + { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} }, + }) + .run(); + } + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test( + "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": " + "\"XYZCITY\"}<|tool▁call▁end|><|tool▁calls▁end|>") + .tools({ get_time_tool }) + .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}")) + .run(); } { - auto tmpls = read_templates("models/templates/GLM-4.6.jinja"); - std::vector end_tokens{ "<|assistant|>", "<|observation|>" }; - - assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GLM_4_5})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "\nI'm\nthinking\nHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - }), true); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "\nspecial_function\narg1\n1\n", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GLM_4_5}), true); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "\nI'm\nthinking\nspecial_function\narg1\n1\n", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }), true); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "\nspecial_function\narg1\n1\nHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GLM_4_5} - ), true); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "\nI'm\nthinkingHello, world!\nWhat's up?\nspecial_function\narg1\n1\n", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }), true); - - // Test streaming - test_parser_with_streaming(message_assist_call_thoughts_content, - "\nI'm\nthinkingHello, world!\nWhat's up?\nspecial_function\narg1\n1\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_thoughts_unparsed, - "\nI'm\nthinking\n\nspecial_function\narg1\n1\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - test_parser_with_streaming(message_assist_call_withopt, - "\n\nspecial_function_with_opt\narg1\n1\narg2\n2\n\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming( - simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"), - "complex_function\n" - "name\n" - "John Doe\n" - "age\n" - "30\n" - "active\n" - "true\n" - "score\n" - "95.5\n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); }); - test_parser_with_streaming( - simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"), - "web_search\n" - "query\n" - "\"From Zero\" Linkin Park album tracklist complete songs\n" - "limit\n" - "3\n" - "type\n" - "text\n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); }); - - // Test interleaved thinking - test_parser_with_streaming(simple_assist_msg("Hello, world!\n\nWhat's up?", "I'm\nthinkingThinking2", "special_function", "{\"arg1\": 1}"), - "\nI'm\nthinkingHello, world!\nThinking2What's up?\nspecial_function\narg1\n1\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(simple_assist_msg("\nI'm\nthinkingHello, world!\nThinking2What's up?", "", "special_function", "{\"arg1\": 1}"), - "\nI'm\nthinkingHello, world!\nThinking2What's up?\nspecial_function\narg1\n1\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "\n\nHello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "\n\nspecial_function\narg1\n1\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ false, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - - // Test template generation for tools with optional parameters - test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools, - "\n\nspecial_function_with_opt\narg1\n1\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ false, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools, - "\n\nspecial_function_with_opt\narg1\n1\narg2\n2\n\n", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ false, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test( + "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": " + "\"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ get_time_tool }) + .expect(message_with_tool_calls_and_reasoning("get_time", "{\"city\":\"Tokyo\"}", "REASONING")) + .run(); } { - auto tmpls = read_templates("models/templates/Kimi-K2-Thinking.jinja"); - std::vector end_tokens{ "<|im_end|>" }; + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test( + "REASONINGCONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": " + "\"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": " + "\"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>") + .tools({ + get_time_tool, get_weather_tool + }) + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .parallel_tool_calls(true) + .expect(message_with_reasoning_content_and_multiple_tool_calls( + "REASONING", "CONTENT", + { { "get_time", "{\"city\":\"Paris\"}" }, { "get_weather", "{\"city\":\"Paris\"}" } })) + .run(); + } - assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test("REASONING\nCONTENT") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(simple_assist_msg("CONTENT", "REASONING\n")) + .run(); + } - // Test parsing regular content - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_KIMI_K2})); + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); + tst.test("CONTENT").expect(simple_assist_msg("CONTENT", "")).run(); + } - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, + // GLM-4.6 tests - format: function_name\n...\n...\n + { + auto tst = peg_tester("models/templates/GLM-4.6.jinja"); + tst.test( + "special_function\n" + "arg1\n1\n" + "") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // GLM-4.7-Flash tests - format: function_name...... + // Note: Template uses forced-open thinking mode (prompt ends with ) + { + auto tst = peg_tester("models/templates/GLM-4.7-Flash.jinja", detailed_debug); + + // Pure content (no reasoning) + tst.test("Hello, world!\nWhat's up?") + .enable_thinking(false) + .expect(message_assist) + .run(); + + // Reasoning with content (forced-open mode - input starts after ) + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + + // Tool call without reasoning + tst.test( + "special_function" + "arg11" + "") + .enable_thinking(false) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with reasoning (forced-open mode) + tst.test( + "I'm\nthinking" + "special_function" + "arg11" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // String argument starting with '[' - should NOT be treated as JSON array + // This tests the fix for Godot scene files and similar content + tst.test( + "html" + "markup[gd_scene load_steps=3 format=3]" + "") + .enable_thinking(false) + .tools({ html_tool }) + .expect_tool_calls({ + { "html", "{\"markup\": \"[gd_scene load_steps=3 format=3]\"}", {} }, + }) + .run(); + + // Multiple tool calls + // Note: Parallel tool calls streaming test skipped - the KEY_VALUE_TAGS format has + // partial parsing edge cases when function names share common prefixes (special_function vs special_function_with_opt) + // The grammar and full parsing work correctly, but incremental streaming detection needs more work. + } + + // Kimi-K2-Thinking tests - FUNC_PREFIXED_INDEXED format + { + auto tst = peg_tester("models/templates/Kimi-K2-Thinking.jinja", detailed_debug); + tst.test( + "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>" + "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format + // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|> + { + auto tst = peg_tester("models/templates/Apertus-8B-Instruct.jinja", detailed_debug); + tst.test("<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // MiniMax-M2 tests - XML invoke format with parameter tags + // Format: value + { + auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug); + tst.test( + "\n1\n") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // NVIDIA-Nemotron-Nano-v2 tests - ... format + // Format: [{"name": "func", "arguments": {...}}] + { + auto tst = peg_tester("models/templates/NVIDIA-Nemotron-Nano-v2.jinja", detailed_debug); + tst.test("[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // CohereForAI-c4ai-command-r7b (uses START_RESPONSE/END_RESPONSE, START_THINKING/END_THINKING, START_ACTION/END_ACTION) + { + auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug); + tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run(); + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_thoughts_call_idx) + .run(); + } + // CohereForAI-c4ai-command-r-plus (uses markdown code block format) + { + auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", detailed_debug); + tst.test("<|CHATBOT_TOKEN|>Hello, world!\nWhat's up?<|END_OF_TURN_TOKEN|>").expect(message_assist).run(); + // Tool calls: Action: followed by JSON code block + tst.test( + "Action:\n" + "```json\n" + "[{\"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}]\n" + "```") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // mistralai-Mistral-Nemo-Instruct-2407.jinja + { + auto tst = peg_tester("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]") + .tools({ special_function_tool }) + .expect(message_assist_call_id) + .run(); + } + { + auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.1.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + // Functionary v3.2 - recipient-based format: >>>recipient\n{content} + { + auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug); + tst.test(">>>all\nHello, world!\nWhat's up?").expect(message_assist).run(); + tst.test(">>>special_function\n{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // FireFunction + { + auto tst = peg_tester("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test(" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // DeepSeek R1 Distill Llama 8B - reasoning tests only (forced open thinking) + // Note: Template uses forced-open mode (prompt ends with ), so input shouldn't include opening tag + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?") + .enable_thinking(true) // Forced open + .expect(message_assist) + .run(); + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + } + // llama-cpp DeepSeek R1 template (always forced-open thinking) + { + auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + tst.test( + "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" + "```json\n{\"arg1\": 1}\n```<|tool▁call▁end|><|tool▁calls▁end|>") + .tools({ special_function_tool }) + .parallel_tool_calls(true) + .expect(message_assist_call) + .run(); + } + // DeepSeek R1 Distill Qwen 32B - reasoning tests only (forced open thinking) + // Note: Template uses forced-open mode (prompt ends with ), so input shouldn't include opening tag + { + auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").enable_thinking(true).expect(message_assist).run(); + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + tst.test( + "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" + "```json\n{\"arg1\": 1}\n```<|tool▁call▁end|><|tool▁calls▁end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + // Kimi-K2 (moonshotai) - FUNC_PREFIXED_INDEXED format + { + auto tst = peg_tester("models/templates/moonshotai-Kimi-K2.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test( + "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>" + "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + // Kimi-K2-Instruct - FUNC_PREFIXED_INDEXED format + { + auto tst = peg_tester("models/templates/Kimi-K2-Instruct.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test( + "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>" + "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // MiMo-VL / Hermes 3 / Qwen 2.5 (Common JSON format) + for (const auto & path : + { "models/templates/MiMo-VL.jinja", "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja", + "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja" }) { + auto tst = peg_tester(path, detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // Apriel 1.5 + { + auto tst = peg_tester("models/templates/unsloth-Apriel-1.5.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + } + + // Apriel 1.6 Thinker (reasoning-only support) + { + auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + + // Implicit reasoning start (forced open) + tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); + + // Reasoning + Tool calls + tst.test( + "I'm\nthinking\n[BEGIN FINAL RESPONSE]\n[{\"name\": \"special_function\", \"arguments\": " + "{\"arg1\": 1}}]") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + } + + // Mistral Small 3.2 - FUNC_BRACKET_TAG format: [TOOL_CALLS]func_name[CALL_ID]id[ARGS]{...} + { + auto tst = peg_tester("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("[TOOL_CALLS]special_function[CALL_ID]123456789[ARGS]{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call_id) + .run(); + } + // Devstral - FUNC_BRACKET_TAG format (no ID marker): [TOOL_CALLS]func_name[ARGS]{...} + { + auto tst = peg_tester("models/templates/unsloth-mistral-Devstral-Small-2507.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); + tst.test("[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + tst.test("Hello, world!\nWhat's up?[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call_content) + .run(); + } + + { + // Llama 3.1 + auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run(); + } + + { + // Llama 3.2 + auto tst = peg_tester("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run(); + } + + { + // Llama 3.3 + auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", detailed_debug); + tst.test("Hello, world!\nWhat's up?").tools({ python_tool }).expect(message_assist).run(); + } + + // GPT-OSS format tests + { + auto tst = peg_tester("models/templates/openai-gpt-oss-120b.jinja", detailed_debug); + + // Basic content only - final channel + tst.test("<|channel|>final<|message|>Hello, world!\nWhat's up?").expect(message_assist).run(); + + // Basic content only - commentary channel + tst.test("<|channel|>commentary<|message|>Hello, world!\nWhat's up?").expect(message_assist).run(); + + // Analysis channel (reasoning) with final channel (content) + tst.test( + "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's " + "up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); + + // Analysis channel only (partial) - still works when reasoning format is set + tst.test("<|channel|>analysis<|message|>I'm\nthinking") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .is_partial(true) + .expect_reasoning("I'm\nthinking") + .run(); + + // Reasoning format none - reasoning stays in content + tst.test( + "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's " + "up?") + .reasoning_format(COMMON_REASONING_FORMAT_NONE) + .expect_content( + "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?") + .run(); + + // Tool call with recipient in role header: " to=functions.NAME<|channel|>analysis<|message|>JSON" + tst.test(" to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with recipient in channel header: "<|channel|>analysis to=functions.NAME<|message|>JSON" + tst.test("<|channel|>analysis to=functions.special_function<|message|>{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with constraint: " to=functions.NAME<|channel|>analysis <|constrain|>json<|message|>JSON" + tst.test(" to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call in commentary channel (channel header variant) + tst.test("<|channel|>commentary to=functions.special_function<|message|>{\"arg1\": 1}") + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + // Tool call with reasoning + content (analysis first, then tool call) + tst.test( + "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n" + "<|start|>assistant to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // Tool calling with extra channel before + tst.test( + "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>commentary" + " to=functions.special_function <|message|>{\"arg1\": 1}") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + // Reasoning after final channel + // Tool calling after final channel + tst.test( + "<|channel|>final<|message|><|end|>" + "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit..." + ) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect_reasoning("Thinking about edit...") + .expect_content("") + .run(); + + // Tool calling after final channel + tst.test( + "<|channel|>final<|message|><|end|>" + "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit...<|end|>" + "<|start|>assistant<|channel|>commentary to=functions.edit <|constrain|>json" + "<|message|>{\"filePath\": \"file.js\", \"oldString\": \"if (part < railCount - 1) {\", \"newString\": \"if (part < 4) {\", \"replaceAll\": false}" + ) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - // Test parsing tool calls - assert_msg_equals(message_assist_call, - test_chat_parse( - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_KIMI_K2})); - - // Test parsing tool calls with thinking - assert_msg_equals(message_assist_call_thoughts, - test_chat_parse( - "I'm\nthinking<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test tool calls with extra content - assert_msg_equals(message_assist_call_content, - test_chat_parse( - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_KIMI_K2} - )); - - // Test tool calls with extra content AND thinking - assert_msg_equals(message_assist_call_thoughts_content, - test_chat_parse( - "I'm\nthinking<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - })); - - // Test streaming - test_parser_with_streaming(message_assist_call_thoughts_content, - "I'm\nthinking\nHello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_thoughts_unparsed, - "I'm\nthinking\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - test_parser_with_streaming(message_assist_call_thoughts_content, - "I'm\nthinking\n\n\nHello, world!\nWhat's up?\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>\n", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(message_assist_call_withopt, - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE - }); }); - test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": \"123456\"}"), - "I'm\nthinkingHello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": \"123456\"}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": [1, 2, \"345\", 6]}"), - "I'm\nthinkingHello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": [1, 2, \"345\", 6]}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}"), - "I'm\nthinkingHello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming( - simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"), - "<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>" - "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); }); - test_parser_with_streaming( - simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"), - "<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:0<|tool_call_argument_begin|>" - "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); }); - test_parser_with_streaming( - simple_assist_msg("", "", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"), - "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>" - "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); }); - test_parser_with_streaming( - simple_assist_msg( - "Let me start by examining the relevant files to understand the current implementation.", "", - "read_file", - "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}"), - "Let me start by examining the relevant files to understand the current implementation." - "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>" - "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); }); - auto multi_tool_msg = simple_assist_msg("Let me call multiple tools.", "I'm thinking."); - multi_tool_msg.tool_calls.push_back({ "read_file", "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}", "" }); - multi_tool_msg.tool_calls.push_back({ "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}", "" }); - multi_tool_msg.tool_calls.push_back({ "complex_function", "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}", "" }); - multi_tool_msg.tool_calls.push_back({ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" }); - test_parser_with_streaming(multi_tool_msg, - "I'm thinking.Let me call multiple tools." - "<|tool_calls_section_begin|>" - "<|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>" - "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}" - "<|tool_call_end|>" - "<|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>" - "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}" - "<|tool_call_end|>" - "<|tool_call_begin|>functions.complex_function:2<|tool_call_argument_begin|>" - "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}" - "<|tool_call_end|>" - "<|tool_call_begin|>functions.emoji_function:3<|tool_call_argument_begin|>" - "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}" - "<|tool_call_end|>" - "<|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - COMMON_CHAT_FORMAT_KIMI_K2, - COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming( - simple_assist_msg("", "I'm thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"), - "I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>" - "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}" - "<|tool_call_end|><|tool_calls_section_end|>", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - COMMON_CHAT_FORMAT_KIMI_K2, - COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - test_parser_with_streaming( - simple_assist_msg("Hello", "I'm thinkingI'm still thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"), - "I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>" - "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}" - "<|tool_call_end|><|tool_calls_section_end|>I'm still thinkingHello", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, { - COMMON_CHAT_FORMAT_KIMI_K2, - COMMON_REASONING_FORMAT_DEEPSEEK - }); }); - - // Test template rendering - common_chat_templates_inputs conversation_with_tools = inputs_tools; - conversation_with_tools.messages.push_back(simple_assist_msg("Let's do it", "Think first", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}")); - conversation_with_tools.messages.push_back({ - "tool", - "Tool response 1", - /* .content_parts = */ {}, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "complex_function", - /* .tool_call_id = */ "", - }); - conversation_with_tools.messages.push_back(simple_assist_msg("Continue", "Think next", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}")); - conversation_with_tools.messages.push_back({ - "tool", - "Tool response 2", - /* .content_parts = */ {}, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "web_search", - /* .tool_call_id = */ "", - }); - conversation_with_tools.messages.push_back(simple_assist_msg("CC", "Think last", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}")); - conversation_with_tools.messages.push_back({ - "tool", - "Tool response 3", - /* .content_parts = */ {}, - /* .tool_calls = */ {}, - /* .reasoning_content = */ "", - /* .tool_name = */ "read_file", - /* .tool_call_id = */ "", - }); - assert_equals(common_chat_templates_apply(tmpls.get(), conversation_with_tools).prompt, std::string("<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"special_function\", \"description\": \"I'm special\", \"parameters\": {\"type\": \"object\", \"properties\": {\"arg1\": {\"type\": \"integer\", \"description\": \"The arg.\"}}, \"required\": [\"arg1\"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hey there!<|im_end|><|im_assistant|>assistant<|im_middle|>Think firstLet's do it<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>complex_function<|im_middle|>## Return of functions.complex_function:0\nTool response 1<|im_end|><|im_assistant|>assistant<|im_middle|>Think nextContinue<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>web_search<|im_middle|>## Return of functions.web_search:1\nTool response 2<|im_end|><|im_assistant|>assistant<|im_middle|>Think lastCC<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:2<|tool_call_argument_begin|>{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>read_file<|im_middle|>## Return of functions.read_file:2\nTool response 3<|im_end|><|im_assistant|>assistant<|im_middle|>")); - - // Test template generation for regular content - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "Hello, world!\nWhat's up?", - /* expect_grammar_triggered= */ false); - - // Test template generation for tool calls - test_templates(tmpls.get(), end_tokens, message_assist_call, tools, - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - - // Test template generation for tools with optional parameters - test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools, - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools, - "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* ignore_whitespace_differences= */ true - ); - } - - // Test Qwen3-Coder XML format - { - // Basic XML tool call parsing - assert_msg_equals( - message_assist_call, - test_chat_parse( - "\n" - " \n" - " \n" - " 1\n" - " \n" - " \n" - "", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_QWEN3_CODER_XML})); - - // Multiple parameters with different types - common_chat_msg expected_multi_param; - expected_multi_param.role = "assistant"; - expected_multi_param.tool_calls = { - { "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}", "" } - }; - - test_parser_with_streaming(expected_multi_param, - "\n" - " \n" - " \n" - " John Doe\n" - " \n" - " \n" - " 30\n" - " \n" - " \n" - " true\n" - " \n" - " \n" - " 95.5\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Special characters and Unicode - common_chat_msg expected_special_chars; - expected_special_chars.role = "assistant"; - expected_special_chars.tool_calls = { - { "unicode_function", "{\"message\":\"Hello 世界! 🌍 Special chars: @#$%^&*()\"}", "" } - }; - - test_parser_with_streaming(expected_special_chars, - "\n" - " \n" - " \n" - " Hello 世界! 🌍 Special chars: @#$%^&*()\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Multiline content with newlines and indentation - common_chat_msg expected_multiline; - expected_multiline.role = "assistant"; - expected_multiline.tool_calls = { - { "code_function", "{\"code\":\"def hello():\\n print(\\\"Hello, World!\\\")\\n return True\"}", "" } - }; - - test_parser_with_streaming(expected_multiline, - "\n" - " \n" - " \n" - "def hello():\n" - " print(\"Hello, World!\")\n" - " return True\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // JSON object as parameter value - common_chat_msg expected_json_param; - expected_json_param.role = "assistant"; - expected_json_param.tool_calls = { - { "json_function", "{\"config\":{\"host\":\"localhost\",\"port\":8080,\"ssl\":false}}", "" } - }; - - test_parser_with_streaming( - expected_json_param, - "\n" - " \n" - " \n" - " {\"host\": \"localhost\", \"port\": 8080, \"ssl\": false}\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Array as parameter value - common_chat_msg expected_array_param; - expected_array_param.role = "assistant"; - expected_array_param.tool_calls = { - { "array_function", "{\"items\":[\"apple\",\"banana\",\"cherry\"]}", "" } - }; - - test_parser_with_streaming( - expected_array_param, - "\n" - " \n" - " \n" - " [\"apple\", \"banana\", \"cherry\"]\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Empty parameter - common_chat_msg expected_empty_param; - expected_empty_param.role = "assistant"; - expected_empty_param.tool_calls = { - { "empty_function", "{\"empty_param\":\"\"}", "" } - }; - - test_parser_with_streaming( - expected_empty_param, - "\n" - " \n" - " \n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Boolean values (true/false) - common_chat_msg expected_boolean; - expected_boolean.role = "assistant"; - expected_boolean.tool_calls = { - { "boolean_function", "{\"enabled\":true,\"debug\":false}", "" } - }; - - test_parser_with_streaming( - expected_boolean, - "\n" - " \n" - " \n" - " true\n" - " \n" - " \n" - " false\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Null value - common_chat_msg expected_null; - expected_null.role = "assistant"; - expected_null.tool_calls = { - { "null_function", "{\"optional_param\":null}", "" } - }; - - test_parser_with_streaming( - expected_null, - "\n" - " \n" - " \n" - " null\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Negative numbers and scientific notation - common_chat_msg expected_numbers; - expected_numbers.role = "assistant"; - expected_numbers.tool_calls = { - { "math_function", "{\"negative\":-42,\"decimal\":-3.14,\"scientific\":1.23e-4}", "" } - }; - - test_parser_with_streaming( - expected_numbers, - "\n" - " \n" - " \n" - " -42\n" - " \n" - " \n" - " -3.14\n" - " \n" - " \n" - " 1.23e-4\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // XML-like content in parameters (should be escaped) - common_chat_msg expected_xml_content; - expected_xml_content.role = "assistant"; - expected_xml_content.tool_calls = { - { "xml_function", "{\"xml_content\":\"value\"}", "" } - }; - - test_parser_with_streaming( - expected_xml_content, - "\n" - " \n" - " \n" - " value\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Quotes and escape characters - common_chat_msg expected_quotes; - expected_quotes.role = "assistant"; - expected_quotes.tool_calls = { - { "quote_function", "{\"message\":\"She said \\\"Hello!\\\" and left.\"}", "" } - }; - - test_parser_with_streaming( - expected_quotes, - "\n" - " \n" - " \n" - " She said \"Hello!\" and left.\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Long parameter value (simplified) - std::string long_text = "This is a long text parameter that should test the parser's ability to handle larger amounts of text data."; - - common_chat_msg expected_long_text; - expected_long_text.role = "assistant"; - expected_long_text.tool_calls = { - { "long_function", "{\"long_text\":\"" + long_text + "\"}", "" } - }; - - test_parser_with_streaming( - expected_long_text, - "\n" - " \n" - " \n" - " " + long_text + "\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Mixed content with text before and after tool call - common_chat_msg expected_mixed_content; - expected_mixed_content.role = "assistant"; - expected_mixed_content.content = "I'll help you search for products. "; - expected_mixed_content.tool_calls = { - { "search_function", "{\"query\":\"laptops\"}", "" } - }; - - test_parser_with_streaming( - expected_mixed_content, - "I'll help you search for products. \n" - " \n" - " \n" - " laptops\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Compact format (no extra whitespace) - common_chat_msg expected_compact; - expected_compact.role = "assistant"; - expected_compact.tool_calls = { - { "compact_function", "{\"param\":\"value\"}", "" } - }; - - test_parser_with_streaming( - expected_compact, - "value", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Function name with underscores and numbers - common_chat_msg expected_complex_name; - expected_complex_name.role = "assistant"; - expected_complex_name.tool_calls = { - { "get_user_data_v2", "{\"user_id\":12345}", "" } - }; - - test_parser_with_streaming( - expected_complex_name, - "\n" - " \n" - " \n" - " 12345\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Parameter names with underscores and numbers - common_chat_msg expected_complex_params; - expected_complex_params.role = "assistant"; - expected_complex_params.tool_calls = { - { "test_function", "{\"param_1\":\"value1\",\"param_2_name\":\"value2\",\"param3\":123}", "" } - }; - - test_parser_with_streaming( - expected_complex_params, - "\n" - " \n" - " \n" - " value1\n" - " \n" - " \n" - " value2\n" - " \n" - " \n" - " 123\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Very deeply nested XML content in parameter - common_chat_msg expected_deep_xml; - expected_deep_xml.role = "assistant"; - expected_deep_xml.tool_calls = { - { "xml_parser", "{\"xml\":\"deep content\"}", "" } - }; - - test_parser_with_streaming( - expected_deep_xml, - "\n" - " \n" - " \n" - " deep content\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Parameter with only whitespace - common_chat_msg expected_whitespace_param; - expected_whitespace_param.role = "assistant"; - expected_whitespace_param.tool_calls = { - { "whitespace_function", "{\"spaces\":\"\"}", "" } - }; - - test_parser_with_streaming( - expected_whitespace_param, - "\n" - " \n" - " \n" - " \n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Parameter with tabs and mixed whitespace - common_chat_msg expected_mixed_whitespace; - expected_mixed_whitespace.role = "assistant"; - expected_mixed_whitespace.tool_calls = { - { "tab_function", "{\"content\":\"line1\\n\\tindented line\\n spaces\"}", "" } - }; - - test_parser_with_streaming( - expected_mixed_whitespace, - "\n" - " \n" - " \n" - "line1\n" - "\tindented line\n" - " spaces\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Control characters and special Unicode - common_chat_msg expected_control_chars; - expected_control_chars.role = "assistant"; - expected_control_chars.tool_calls = { - { "control_function", "{\"text\":\"Line1\\nLine2\\tTabbed\\rCarriage return\"}", "" } - }; - - test_parser_with_streaming( - expected_control_chars, - "\n" - " \n" - " \n" - "Line1\nLine2\tTabbed\rCarriage return\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Emoji and extended Unicode characters - common_chat_msg expected_emoji; - expected_emoji.role = "assistant"; - expected_emoji.tool_calls = { - { "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" } - }; - - test_parser_with_streaming( - expected_emoji, - "\n" - " \n" - " \n" - " Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Mathematical expressions and formulas - common_chat_msg expected_math; - expected_math.role = "assistant"; - expected_math.tool_calls = { - { "math_function", "{\"formula\":\"E = mc² and ∫f(x)dx = F(x) + C\"}", "" } - }; - - test_parser_with_streaming( - expected_math, - "\n" - " \n" - " \n" - " E = mc² and ∫f(x)dx = F(x) + C\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // SQL injection-like content (should be safely escaped) - common_chat_msg expected_sql; - expected_sql.role = "assistant"; - expected_sql.tool_calls = { - { "sql_function", "{\"query\":\"SELECT * FROM users WHERE id = 1; DROP TABLE users; --\"}", "" } - }; - - test_parser_with_streaming( - expected_sql, - "\n" - " \n" - " \n" - " SELECT * FROM users WHERE id = 1; DROP TABLE users; --\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // HTML/XML injection content - common_chat_msg expected_html; - expected_html.role = "assistant"; - expected_html.tool_calls = { - { "html_function", "{\"content\":\"\"}", "" } - }; - - test_parser_with_streaming( - expected_html, - "\n" - " \n" - " \n" - " \n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Binary-like content (base64) - common_chat_msg expected_binary; - expected_binary.role = "assistant"; - expected_binary.tool_calls = { - { "binary_function", "{\"data\":\"SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\"}", "" } - }; - - test_parser_with_streaming( - expected_binary, - "\n" - " \n" - " \n" - " SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - - // Very large numbers (should be parsed as scientific notation) - common_chat_msg expected_large_numbers; - expected_large_numbers.role = "assistant"; - expected_large_numbers.tool_calls = { - { "number_function", "{\"big_int\":1e+60}", "" } // Large number becomes scientific notation - }; - - test_parser_with_streaming( - expected_large_numbers, - "\n" - " \n" - " \n" - " 999999999999999999999999999999999999999999999999999999999999\n" - " \n" - " \n" - "", - [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); }); - } - - { - // Qwen3-Coder template - auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja"); - common_chat_templates_inputs inputs; - inputs.messages = { message_user }; - - common_chat_tool qwen_union_tool { - /* .name = */ "qwen_union", - /* .description = */ "Test tool for union/anyOf handling", - /* .parameters = */ R"({ - "type": "object", - "properties": { - "priority": { "type": ["number", "null"] }, - "maybe_text": { "anyOf": [ { "type": "string" } ] }, - "config": { "anyOf": [ { "type": "object" }, { "type": "null" } ] } - }, - "required": [] - })", - }; - inputs.tools = { qwen_union_tool }; - - auto params = common_chat_templates_apply(tmpls.get(), inputs); - assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, params.format); - assert_equals(false, params.grammar.empty()); - - // Grammar should compile successfully - auto grammar = build_grammar(params.grammar); - GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types"); - } -} - -static void test_template_output_peg_parsers() { - printf("[%s]\n", __func__); - - // JSON schemas - const char * invoice_schema = R"({ - "type": "object", - "properties": { - "amount": {"type": "number"}, - "date": {"type": "string"} - } - })"; - - { - // Ministral-3-14B-Reasoning-2512 - auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja"); - - // Test basic message - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "Hello, world!\nWhat's up?"; - t.expect = message_assist; - }); - - // Test basic message and reasoning with reasoning_format = none - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?"; - t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?"; - }); - - // Test basic message and reasoning with reasoning_format = auto - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - - t.expect = message_assist_thoughts; - }); - - // Test tool call - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {special_function_tool}; - - t.expect = message_assist_call; - }); - - // Test tool call with reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "[THINK]I'm\nthinking[/THINK]" - R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {special_function_tool}; - - t.expect = message_assist_call_thoughts; - }); - - // Test parallel tool calls - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})" - R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.parallel_tool_calls = true; - t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; - - t.expect.tool_calls = {{ - /* .name = */ "special_function", - /* .arguments = */ R"({"arg1": 1})", - /* .id = */ {}, - }, { - /* .name = */ "special_function_with_opt", - /* .arguments = */ R"({"arg1": 1, "arg2": 2})", - /* .id = */ {}, - }}; - }); - - // Test response format - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "[THINK]I need to output the invoice details in JSON[/THINK]" - "```json\n" - R"({"amount": 123.45, "date": "2025-12-03"})" - "\n```"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.json_schema = invoice_schema; - - t.expect.reasoning_content = "I need to output the invoice details in JSON"; - t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})"; - }); - } - - { - // NVIDIA Nemotron-3 Nano - auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja"); - - // Test basic message - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "Hello, world!\nWhat's up?"; - t.expect = message_assist; - }); - - // Test basic message and reasoning with reasoning_format = none - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "I'm\nthinking\n\nHello, world!\nWhat's up?"; - t.expect.content = "I'm\nthinking\n\nHello, world!\nWhat's up?"; - }); - - // Test basic message and reasoning with reasoning_format = auto - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "I'm\nthinking\n\nHello, world!\nWhat's up?"; - t.params.enable_thinking = true; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - - t.expect = message_assist_thoughts; - }); - - // Test tool call - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "\n" - "\n" - "\n" - "1\n" - "\n" - "\n" - ""; - t.params.enable_thinking = false; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {special_function_tool}; - - t.expect = message_assist_call; - }); - - // Test tool call with reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "I'm\nthinking\n\n" - "\n" - "\n" - "\n" - "1\n" - "\n" - "\n" - ""; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {special_function_tool}; - - t.expect = message_assist_call_thoughts; - }); - - // Test parallel tool calls - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "\n" - "\n" - "\n" - "1\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "\n" - "1\n" - "\n" - "\n" - "2\n" - "\n" - "\n" - ""; - t.params.enable_thinking = false; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.parallel_tool_calls = true; - t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; - - t.expect.tool_calls = {{ - /* .name = */ "special_function", - /* .arguments = */ R"({"arg1": 1})", - /* .id = */ {}, - }, { - /* .name = */ "special_function_with_opt", - /* .arguments = */ R"({"arg1": 1, "arg2": 2})", - /* .id = */ {}, - }}; - }); - - // Test tool call with string parameter - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "\n" - "\n" - "\n" - "def hello():\n" - " print(\"Hello, world!\")\n" - "\n" - "hello()\n" - "\n" - "\n" - ""; - t.params.enable_thinking = false; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {python_tool}; - - t.expect.tool_calls = {{ - /* .name = */ "python", - /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", - /* .id = */ {}, - }}; - }); - - // Test tool call with string parameter and no closing tag - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "\n" - "\n" - "\n" - "def hello():\n" - " print(\"Hello, world!\")\n" - "\n" - "hello()\n" - "\n" - ""; - t.params.enable_thinking = false; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.tools = {python_tool}; - - t.expect.tool_calls = {{ - /* .name = */ "python", - /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", - /* .id = */ {}, - }}; - }); - - // Test response format - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = - "I need to output the invoice details in JSON\n" - "\n" - R"({"amount": 123.45, "date": "2025-12-03"})"; - t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; - t.params.json_schema = invoice_schema; - - t.expect.reasoning_content = "I need to output the invoice details in JSON"; - t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})"; - }); - } - - { - // Solar-Open-100B - auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja"); - - // Test basic message - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|content|>Hello, world!\nWhat's up?"; - t.expect = message_assist; - }); - - // Test basic message and reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I'm\nthinking<|end|><|begin|>assistant<|content|>Hello, world!\nWhat's up?"; - t.expect = message_assist_thoughts; - }); - - // Test basic message and reasoning_effort = low - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|content|>Hello, world!\nWhat's up?"; - t.params.chat_template_kwargs["reasoning_effort"] = "\"low\""; - t.expect = message_assist; - }); - - // Test tool call - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|tool_calls|>" - "<|tool_call:begin|>123456789" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>"; - - t.params.chat_template_kwargs["reasoning_effort"] = "\"low\""; - t.params.tools = {special_function_tool}; - t.expect = message_assist_call_id; - }); - - // Test tool call with reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I'm\nthinking<|end|>" - "<|begin|>assistant<|tool_calls|>" - "<|tool_call:begin|>0" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>"; - - t.params.tools = {special_function_tool}; - t.expect = message_assist_thoughts_call_idx; - }); - - // Test tool call with reasoning and tool_choice = required - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I'm\nthinking<|end|>" - "<|begin|>assistant<|tool_calls|>" - "<|tool_call:begin|>0" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>"; - - t.params.tools = {special_function_tool}; - t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED; - t.expect = message_assist_thoughts_call_idx; - }); - - // Test tool call without reasoning and tool_choice = required - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|tool_calls|>" - "<|tool_call:begin|>0" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>"; - - t.params.tools = {special_function_tool}; - t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED; - t.params.chat_template_kwargs["reasoning_effort"] = "\"low\""; - t.expect = message_assist_call_idx; - }); - - // Test parallel tool calls - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I'm\nthinking<|end|>" - "<|begin|>assistant<|tool_calls|>" - "<|tool_call:begin|>0" - "<|tool_call:name|>special_function" - "<|tool_call:args|>{\"arg1\":1}" - "<|tool_call:end|>" - "<|tool_call:begin|>1" - "<|tool_call:name|>special_function_with_opt" - "<|tool_call:args|>{\"arg1\": 1, \"arg2\": 2}" - "<|tool_call:end|>"; - - t.params.parallel_tool_calls = true; - t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; - - t.expect.reasoning_content = "I'm\nthinking"; - t.expect.tool_calls = {{ - /* .name = */ "special_function", - /* .arguments = */ R"({"arg1": 1})", - /* .id = */ "0", - }, { - /* .name = */ "special_function_with_opt", - /* .arguments = */ R"({"arg1": 1, "arg2": 2})", - /* .id = */ "1", - }}; - }); - - // Test response format - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|think|>I need to output the invoice details in JSON<|end|>" - "<|begin|>assistant<|content|>" - R"({"amount": 123.45, "date": "2025-12-03"})"; - - t.params.json_schema = invoice_schema; - - t.expect.reasoning_content = "I need to output the invoice details in JSON"; - t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})"; - }); - - // Test response format no reasoning - test_peg_parser(tmpls.get(), [&](auto & t) { - t.input = "<|content|>" - R"({"amount": 123.45, "date": "2025-12-03"})"; - - t.params.chat_template_kwargs["reasoning_effort"] = "\"low\""; - t.params.json_schema = invoice_schema; - - t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})"; - }); + /* .name = */ "edit", + /* .description = */ "Edit a file", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "oldString": { + "type": "string", + "description": "Old string to replace." + }, + "newString": { + "type": "string", + "description": "New replacement string." + }, + "replaceAll": { + "type": "boolean", + "description": "Whether to replace all occurences." + } + }, + "required": ["oldString", "newString"] + })", + } + }) + .expect_reasoning("Thinking about edit...") + .expect_tool_calls({ + { "edit", R"({"filePath": "file.js", "oldString": "if (part < railCount - 1) {", "newString": "if (part < 4) {", "replaceAll": false})", {} } + }) + .run(); + + // Parallel tool calls + tst.test( + " to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}\n" + "<|start|>assistant to=functions.special_function_with_opt<|channel|>analysis<|message|>{\"arg1\": 1, " + "\"arg2\": 2}") + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); } } static void test_msg_diffs_compute() { - printf("[%s]\n", __func__); + LOG_DBG("%s\n", __func__); { common_chat_msg msg1; @@ -3940,9 +2293,7 @@ static void test_msg_diffs_compute() { common_chat_msg_diff diff; diff.content_delta = "Hello, world!"; - assert_equals( - {diff}, - common_chat_msg_diff::compute_diffs(msg1, msg2)); + assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2)); } { common_chat_msg msg1; @@ -3954,37 +2305,35 @@ static void test_msg_diffs_compute() { common_chat_msg_diff diff; diff.content_delta = " world!"; - assert_equals( - {diff}, - common_chat_msg_diff::compute_diffs(msg1, msg2)); + assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2)); } { common_chat_msg msg0; common_chat_msg msg1; - msg1.tool_calls = { { "special_function", "{\"ar", /* .id = */ "123" } }; + msg1.tool_calls = { + { "special_function", "{\"ar", /* .id = */ "123" } + }; common_chat_msg msg2; - msg2.tool_calls = { { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } }; + msg2.tool_calls = { + { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } + }; common_chat_msg_diff diff01; - diff01.tool_call_index = 0; - diff01.tool_call_delta.name = "special_function"; - diff01.tool_call_delta.id = "123"; + diff01.tool_call_index = 0; + diff01.tool_call_delta.name = "special_function"; + diff01.tool_call_delta.id = "123"; diff01.tool_call_delta.arguments = "{\"ar"; - assert_equals( - {diff01}, - common_chat_msg_diff::compute_diffs(msg0, msg1)); + assert_equals({ diff01 }, common_chat_msg_diff::compute_diffs(msg0, msg1)); common_chat_msg_diff diff12; - diff12.tool_call_index = 0; + diff12.tool_call_index = 0; // Note: neither id nor name change here. diff12.tool_call_delta.arguments = "g1\": 1}"; - assert_equals( - {diff12}, - common_chat_msg_diff::compute_diffs(msg1, msg2)); + assert_equals({ diff12 }, common_chat_msg_diff::compute_diffs(msg1, msg2)); } { common_chat_msg msg0; @@ -3996,68 +2345,81 @@ static void test_msg_diffs_compute() { }; common_chat_msg_diff diff1; - diff1.tool_call_index = 0; - diff1.tool_call_delta.name = "f1"; - diff1.tool_call_delta.id = "123"; + diff1.tool_call_index = 0; + diff1.tool_call_delta.name = "f1"; + diff1.tool_call_delta.id = "123"; diff1.tool_call_delta.arguments = "{\"arg1\": 1}"; common_chat_msg_diff diff2; - diff2.tool_call_index = 1; - diff2.tool_call_delta.name = "f2"; - diff2.tool_call_delta.id = "222"; + diff2.tool_call_index = 1; + diff2.tool_call_delta.name = "f2"; + diff2.tool_call_delta.id = "222"; diff2.tool_call_delta.arguments = "{\"arg2\": 2}"; - assert_equals( - {diff1, diff2}, - common_chat_msg_diff::compute_diffs(msg0, msg2)); + assert_equals({ diff1, diff2 }, common_chat_msg_diff::compute_diffs(msg0, msg2)); } } int main(int argc, char ** argv) { common_log_set_verbosity_thold(999); + bool detailed_debug = false; + bool only_run_filtered = false; - // try { -#ifndef _WIN32 - if (argc > 1) { - common_chat_templates_inputs inputs; - common_chat_msg msg; - msg.role = "user"; - msg.content = "Hey"; - inputs.messages = {msg}; - inputs.tools = { special_function_tool }; - - std::cout << "| Template | Format |\n"; - std::cout << "|----------|--------|\n"; - - for (int i = 1; i < argc; i++) { - try { - std::string path = argv[i]; - if (path.rfind(".jinja") != path.size() - 6) { - std::cerr << "Skipping non-jinja file: " << path << '\n'; - continue; - } - auto tmpls = read_templates(path); - auto parts = string_split(path, "/"); - auto name = parts[parts.size() - 1]; - auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format); - std::cout << "| " << name << " | " << format << " |\n"; - } catch (const std::exception & e) { - std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n'; - } - } - } else -#endif - { - test_msg_diffs_compute(); - test_msgs_oaicompat_json_conversion(); - test_tools_oaicompat_json_conversion(); - test_template_output_parsers(); - test_template_output_peg_parsers(); - std::cout << "\n[chat] All tests passed!" << '\n'; + // Check for --template flag + for (int i = 1; i < argc; i++) { + std::string arg = argv[i]; + if (arg == "--template" && i + 1 < argc) { + g_template_filter = argv[++i]; + // Only run PEG parser tests with the filter + only_run_filtered = true; } + if (arg == "--detailed") { + detailed_debug = true; + } + } + + if (only_run_filtered) { + test_template_output_peg_parsers(detailed_debug); + std::cout << "\n[chat] All template tests passed!" << '\n'; return 0; - // } catch (const std::exception & e) { - // std::cerr << "Error: " << e.what() << '\n'; - // return 1; - // } + } + +#ifndef _WIN32 + if (argc > 1) { + common_chat_templates_inputs inputs; + common_chat_msg msg; + msg.role = "user"; + msg.content = "Hey"; + inputs.messages = { msg }; + inputs.tools = { special_function_tool }; + + std::cout << "| Template | Format |\n"; + std::cout << "|----------|--------|\n"; + + for (int i = 1; i < argc; i++) { + try { + std::string path = argv[i]; + if (path.rfind(".jinja") != path.size() - 6) { + std::cerr << "Skipping non-jinja file: " << path << '\n'; + continue; + } + auto tmpls = read_templates(path); + auto parts = string_split(path, "/"); + const auto & name = parts[parts.size() - 1]; + const auto * format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format); + std::cout << "| " << name << " | " << format << " |\n"; + } catch (const std::exception & e) { + std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n'; + } + } + } else +#endif + { + test_msg_diffs_compute(); + test_msgs_oaicompat_json_conversion(); + test_tools_oaicompat_json_conversion(); + test_template_output_peg_parsers(detailed_debug); + std::cout << "\n[chat] All tests passed!" << '\n'; + } + return 0; } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 518f8b9ae7..7c63b3aae5 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -26,6 +26,7 @@ else() add_subdirectory(server) endif() add_subdirectory(tokenize) + add_subdirectory(parser) add_subdirectory(tts) add_subdirectory(mtmd) if (GGML_RPC) diff --git a/tools/parser/CMakeLists.txt b/tools/parser/CMakeLists.txt new file mode 100644 index 0000000000..4bf40a8717 --- /dev/null +++ b/tools/parser/CMakeLists.txt @@ -0,0 +1,8 @@ +set(TARGET llama-debug-template-parser) +add_executable(${TARGET} debug-template-parser.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) + +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} RUNTIME) +endif() diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp new file mode 100644 index 0000000000..551d2bcf9d --- /dev/null +++ b/tools/parser/debug-template-parser.cpp @@ -0,0 +1,531 @@ +#include "../src/llama-grammar.h" +#include "chat-auto-parser.h" +#include "chat.h" +#include "common.h" +#include "gguf.h" +#include "log.h" + +#include +#include +#include + +#include "nlohmann/json.hpp" + +using json = nlohmann::ordered_json; + +enum class output_mode { + ANALYSIS, // Only output analysis results (default) + TEMPLATE, // Only output rendered template + BOTH // Output both +}; + +enum class input_message_type { + NONE, // Don't render any message scenarios (only analysis) + CONTENT_ONLY, // Simple assistant message with content + REASONING_CONTENT, // Message with reasoning_content + content + TOOL_CALL_ONLY, // Message with tool_calls only + CONTENT_TOOL_CALL, // Message with content + tool_calls + REASONING_TOOL_CALL, // Message with reasoning_content + tool_calls + CONTENT_FAKE_TOOL_CALL, // Message with content but no actual tool_calls (for testing) + ALL // Render all scenarios +}; + +struct debug_options { + std::string template_path; + bool with_tools = true; + bool generation_prompt = true; + bool enable_reasoning = true; + output_mode mode = output_mode::BOTH; + input_message_type input_message = input_message_type::NONE; +}; + +static std::string read_file(const std::string & path) { + std::ifstream fin(path, std::ios::binary); + if (!fin.is_open()) { + throw std::runtime_error("Could not open file: " + path); + } + std::ostringstream buf; + buf << fin.rdbuf(); + return buf.str(); +} + +static std::string read_gguf_chat_template(const std::string & path) { + struct gguf_init_params params = { /*no_alloc =*/true, // We only need metadata, not tensor data + /*ctx=*/nullptr }; + + struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params); + if (ctx == nullptr) { + throw std::runtime_error("Could not open GGUF file: " + path); + } + + const char * key = "tokenizer.chat_template"; + int64_t key_id = gguf_find_key(ctx, key); + + if (key_id == -1) { + gguf_free(ctx); + throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key)); + } + + const char * template_str = gguf_get_val_str(ctx, key_id); + if (template_str == nullptr) { + gguf_free(ctx); + throw std::runtime_error("GGUF file contains chat template key but value is null"); + } + + std::string result = template_str; + gguf_free(ctx); + return result; +} + +static void print_usage(const char * program_name) { + LOG_ERR("Usage: %s [options]\n", program_name); + LOG_ERR("\nOptions:\n"); + LOG_ERR(" --no-tools Disable tool definitions\n"); + LOG_ERR(" --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n"); + LOG_ERR(" --enable-reasoning=0|1 Enable reasoning parsing (default: 1)\n"); + LOG_ERR(" --output=MODE Output mode: analysis, template, both (default: both)\n"); + LOG_ERR(" --input-message=TYPE Message type to render:\n"); + LOG_ERR(" content_only, reasoning_content, tool_call_only,\n"); + LOG_ERR(" content_tool_call, reasoning_tool_call,\n"); + LOG_ERR(" content_fake_tool_call, all\n"); + LOG_ERR("\nExamples:\n"); + LOG_ERR(" %s template.jinja --input-message=all --generation-prompt=1\n", program_name); + LOG_ERR(" %s template.jinja --output=template --input-message=tool_call_only\n", program_name); +} + +static bool parse_bool_option(const std::string & value) { + return value == "1" || value == "true" || value == "yes"; +} + +static bool parse_options(int argc, char ** argv, debug_options & opts) { + if (argc < 2) { + print_usage(argv[0]); + return false; + } + + opts.template_path = argv[1]; + + for (int i = 2; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg == "--no-tools") { + opts.with_tools = false; + } else if (arg.rfind("--generation-prompt=", 0) == 0) { + opts.generation_prompt = parse_bool_option(arg.substr(20)); + } else if (arg.rfind("--enable-reasoning=", 0) == 0) { + opts.enable_reasoning = parse_bool_option(arg.substr(19)); + } else if (arg.rfind("--output=", 0) == 0) { + std::string mode = arg.substr(9); + if (mode == "analysis") { + opts.mode = output_mode::ANALYSIS; + } else if (mode == "template") { + opts.mode = output_mode::TEMPLATE; + } else if (mode == "both") { + opts.mode = output_mode::BOTH; + } else { + LOG_ERR("Unknown output mode: %s\n", mode.c_str()); + return false; + } + } else if (arg.rfind("--input-message=", 0) == 0) { + std::string type = arg.substr(16); + if (type == "content_only") { + opts.input_message = input_message_type::CONTENT_ONLY; + } else if (type == "reasoning_content") { + opts.input_message = input_message_type::REASONING_CONTENT; + } else if (type == "tool_call_only") { + opts.input_message = input_message_type::TOOL_CALL_ONLY; + } else if (type == "content_tool_call") { + opts.input_message = input_message_type::CONTENT_TOOL_CALL; + } else if (type == "reasoning_tool_call") { + opts.input_message = input_message_type::REASONING_TOOL_CALL; + } else if (type == "content_fake_tool_call") { + opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL; + } else if (type == "all") { + opts.input_message = input_message_type::ALL; + } else { + LOG_ERR("Unknown input message type: %s\n", type.c_str()); + return false; + } + } else { + LOG_ERR("Unknown option: %s\n", arg.c_str()); + print_usage(argv[0]); + return false; + } + } + + return true; +} + +static json build_user_message() { + return json{ + { "role", "user" }, + { "content", "Hello, please help me with a task." } + }; +} + +static json build_content_only_message() { + return json{ + { "role", "assistant" }, + { "content", "Hello! I'm here to help you with your task." } + }; +} + +static json build_reasoning_content_message() { + return json{ + { "role", "assistant" }, + { "content", "Hello! I'm here to help you with your task." }, + { "reasoning_content", "The user is greeting me and asking for help. I should respond politely." } + }; +} + +static json build_tool_call_only_message() { + return json{ + { "role", "assistant" }, + { "content", nullptr }, + { "tool_calls", + json::array({ json{ + { "type", "function" }, + { "function", json{ { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } }, + { "id", "123456789" } } }) } + }; +} + +static json build_content_tool_call_message() { + return json{ + { "role", "assistant" }, + { "content", "I'll help you by calling a function." }, + { "tool_calls", + json::array({ json{ + { "type", "function" }, + { "function", + json{ { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } + }; +} + +static json build_reasoning_tool_call_message() { + return json{ + { "role", "assistant" }, + { "content", nullptr }, + { "reasoning_content", "I need to call a function to help with this task." }, + { "tool_calls", + json::array({ json{ + { "type", "function" }, + { "function", + json{ { "name", "test_function_name" }, + { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } + }; +} + +static json build_content_fake_tool_call_message() { + // This message has content but NO tool_calls field + // It's used to test if a template renders tool definitions but not tool calls + return json{ + { "role", "assistant" }, + { "content", "I'll help you by calling a function." } + }; +} + +static json build_tools_definition() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["param1"] = json::object({ + { "type", "string" }, + { "description", "First parameter" } + }); + parameters_schema["properties"]["param2"] = json::object({ + { "type", "string" }, + { "description", "Second parameter" } + }); + parameters_schema["required"] = json::array({ "param1", "param2" }); + + return json::array({ + json{ { "type", "function" }, + { "function", json{ { "name", "test_function_name" }, + { "description", "A test function for debugging" }, + { "parameters", parameters_schema } } } } + }); +} + +static void render_scenario(const common_chat_template & tmpl, + const std::string & scenario_name, + const json & messages, + const json & tools, + bool add_generation_prompt, + bool enable_thinking) { + LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str()); + LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false", + enable_thinking ? "true" : "false"); + + // When add_generation_prompt is true, add a trailing user message to trigger the prompt + json final_messages = messages; + if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") { + final_messages.push_back(json{ + { "role", "user" }, + { "content", "Now please continue with another response." } + }); + } + + LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str()); + + try { + templates_params inputs; + inputs.messages = final_messages; + inputs.add_generation_prompt = add_generation_prompt; + inputs.extra_context["enable_thinking"] = enable_thinking; + + if (!tools.is_null() && tools.is_array() && !tools.empty()) { + inputs.tools = tools; + } + + std::string output = common_chat_template_direct_apply(tmpl, inputs); + + LOG_ERR("\n--- Rendered Output ---\n"); + LOG_ERR("%s\n", output.c_str()); + LOG_ERR("--- End Output (length: %zu) ---\n", output.length()); + } catch (const std::exception & e) { + LOG_ERR("Rendering failed: %s\n", e.what()); + } +} + +static void render_all_scenarios(const common_chat_template & tmpl, + const json & tools, + bool add_generation_prompt, + bool enable_thinking, + input_message_type message_type) { + json user_msg = build_user_message(); + + auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) { + if (message_type == input_message_type::ALL || message_type == type) { + json messages = json::array({ user_msg, assistant_msg }); + render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking); + } + }; + + render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message()); + render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message()); + render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message()); + render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message()); + render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message()); + render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call", + build_content_fake_tool_call_message()); + + // Also render with add_generation_prompt=true to show the prompt ending + if (message_type == input_message_type::ALL) { + LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n"); + + json prompt_messages = json::array({ user_msg }); + render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking); + + // With enable_thinking toggled + render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false); + } +} + +static const char * reasoning_mode_to_str(content_structure::reasoning_mode_type mode) { + switch (mode) { + case content_structure::REASONING_NONE: + return "NONE"; + case content_structure::REASONING_OPTIONAL: + return "OPTIONAL"; + case content_structure::REASONING_FORCED_OPEN: + return "FORCED_OPEN"; + } + return "UNKNOWN"; +} + +static const char * content_mode_to_str(content_structure::content_mode_type mode) { + switch (mode) { + case content_structure::CONTENT_PLAIN: + return "PLAIN"; + case content_structure::CONTENT_ALWAYS_WRAPPED: + return "ALWAYS_WRAPPED"; + case content_structure::CONTENT_WRAPPED_WITH_REASONING: + return "WRAPPED_WITH_REASONING"; + } + return "UNKNOWN"; +} + +static const char * function_format_to_str(enum tool_call_structure::function_format fmt) { + switch (fmt) { + case tool_call_structure::FUNC_JSON_OBJECT: + return "JSON_OBJECT"; + case tool_call_structure::FUNC_TAG_WITH_NAME: + return "TAG_WITH_NAME"; + case tool_call_structure::FUNC_TAG_NAME_ONLY: + return "TAG_NAME_ONLY"; + case tool_call_structure::FUNC_PREFIXED_INDEXED: + return "PREFIXED_INDEXED"; + case tool_call_structure::FUNC_NAME_AS_KEY: + return "NAME_AS_KEY"; + case tool_call_structure::FUNC_BRACKET_TAG: + return "BRACKET_TAG"; + case tool_call_structure::FUNC_RECIPIENT_BASED: + return "RECIPIENT_BASED"; + case tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK: + return "MARKDOWN_CODE_BLOCK"; + } + return "UNKNOWN"; +} + +static const char * argument_format_to_str(enum tool_call_structure::argument_format fmt) { + switch (fmt) { + case tool_call_structure::ARGS_JSON: + return "JSON"; + case tool_call_structure::ARGS_TAGGED: + return "TAGGED"; + case tool_call_structure::ARGS_KEY_VALUE_TAGS: + return "KEY_VALUE_TAGS"; + } + return "UNKNOWN"; +} + +int main(int argc, char ** argv) { + // Set log level to most verbose to capture all debug output + common_log_set_verbosity_thold(99); + + debug_options opts; + if (!parse_options(argc, argv, opts)) { + return 1; + } + + std::string template_source; + try { + // Check if the file is a GGUF file + if (opts.template_path.size() >= 5 && + opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) { + template_source = read_gguf_chat_template(opts.template_path); + } else { + template_source = read_file(opts.template_path); + } + } catch (const std::exception & e) { + LOG_ERR("Error reading template: %s\n", e.what()); + return 1; + } + + LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str()); + LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false", + opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false"); + + try { + common_chat_template chat_template(template_source, "", ""); + + // Build tools definition + json tools = opts.with_tools ? build_tools_definition() : json(); + + // Render template scenarios if requested + if (opts.input_message != input_message_type::NONE && + (opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) { + LOG_ERR("\n"); + LOG_ERR("================================================================================\n"); + LOG_ERR(" TEMPLATE RENDERING OUTPUT\n"); + LOG_ERR("================================================================================\n"); + + render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning, + opts.input_message); + } + + // Output analysis if requested + if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) { + LOG_ERR("\n"); + LOG_ERR("================================================================================\n"); + LOG_ERR(" TEMPLATE ANALYSIS\n"); + LOG_ERR("================================================================================\n"); + + template_analysis_result analysis = template_analyzer::analyze_template(chat_template); + + LOG_ERR("\n=== Analysis Results ===\n"); + + LOG_ERR("\n--- Content Structure (Phase 1) ---\n"); + LOG_ERR("reasoning_mode: %s\n", reasoning_mode_to_str(analysis.content.reasoning_mode)); + LOG_ERR("reasoning_start: '%s'\n", analysis.content.reasoning_start.c_str()); + LOG_ERR("reasoning_end: '%s'\n", analysis.content.reasoning_end.c_str()); + LOG_ERR("content_mode: %s\n", content_mode_to_str(analysis.content.content_mode)); + LOG_ERR("content_start: '%s'\n", analysis.content.content_start.c_str()); + LOG_ERR("content_end: '%s'\n", analysis.content.content_end.c_str()); + + LOG_ERR("\n--- Tool Structure (Phase 2) ---\n"); + LOG_ERR("supports_tools: %s\n", analysis.tools.supports_tools ? "true" : "false"); + LOG_ERR("function_format: %s\n", function_format_to_str(analysis.tools.function_format)); + LOG_ERR("argument_format: %s\n", argument_format_to_str(analysis.tools.argument_format)); + LOG_ERR("tool_section_start: '%s'\n", analysis.tools.tool_section_start.c_str()); + LOG_ERR("tool_section_end: '%s'\n", analysis.tools.tool_section_end.c_str()); + LOG_ERR("function_prefix: '%s'\n", analysis.tools.function_prefix.c_str()); + LOG_ERR("function_suffix: '%s'\n", analysis.tools.function_suffix.c_str()); + LOG_ERR("function_close: '%s'\n", analysis.tools.function_close.c_str()); + LOG_ERR("arg_prefix: '%s'\n", analysis.tools.arg_prefix.c_str()); + LOG_ERR("arg_suffix: '%s'\n", analysis.tools.arg_suffix.c_str()); + LOG_ERR("arg_close: '%s'\n", analysis.tools.arg_close.c_str()); + LOG_ERR("name_field: '%s'\n", analysis.tools.name_field.c_str()); + LOG_ERR("args_field: '%s'\n", analysis.tools.args_field.c_str()); + LOG_ERR("id_field: '%s'\n", analysis.tools.id_field.c_str()); + + // Additional fields for special formats + if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { + LOG_ERR("\n--- Prefixed-Indexed Format Details ---\n"); + LOG_ERR("per_call_start: '%s'\n", analysis.tools.per_call_start.c_str()); + LOG_ERR("function_namespace: '%s'\n", analysis.tools.function_namespace.c_str()); + LOG_ERR("args_marker: '%s'\n", analysis.tools.args_marker.c_str()); + LOG_ERR("per_call_end: '%s'\n", analysis.tools.per_call_end.c_str()); + } + if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) { + LOG_ERR("\n--- Bracket-Tag Format Details ---\n"); + LOG_ERR("per_call_start: '%s'\n", analysis.tools.per_call_start.c_str()); + LOG_ERR("id_marker: '%s'\n", analysis.tools.id_marker.c_str()); + LOG_ERR("args_marker: '%s'\n", analysis.tools.args_marker.c_str()); + } + + // Generate Parser + templates_params params; + params.messages = json::array(); + params.reasoning_format = + opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE; + params.enable_thinking = opts.enable_reasoning; + params.add_generation_prompt = opts.generation_prompt; + + if (opts.with_tools) { + params.tools = tools; + params.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + } else { + params.tools = json(); + params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE; + } + params.parallel_tool_calls = false; + + auto parser_data = universal_peg_generator::generate_parser(analysis, chat_template, params); + + LOG_ERR("\n=== Generated Parser ===\n"); + LOG_ERR("%s\n", json::parse(parser_data.parser).dump(4).c_str()); + + LOG_ERR("\n=== Generated Grammar ===\n"); + LOG_ERR("%s\n", parser_data.grammar.c_str()); + + LOG_ERR("\n=== Generated Lazy Grammar ===\n"); + LOG_ERR("%d\n", parser_data.grammar_lazy); + + LOG_ERR("\n=== Generated Grammar Triggers ===\n"); + for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) { + LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str()); + } + + LOG_ERR("\n=== Preserved Tokens ===\n"); + for (const std::string & token : parser_data.preserved_tokens) { + LOG_ERR(" '%s'\n", token.c_str()); + } + + LOG_ERR("\n=== Verifying created grammar ===\n"); + auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root", + parser_data.grammar_lazy, nullptr, 0, nullptr, 0); + if (grammar != nullptr) { + LOG_ERR("\n=== Grammar successfully created ===\n"); + } + } + } catch (const std::exception & e) { + LOG_ERR("Analysis failed: %s\n", e.what()); + return 1; + } + + return 0; +} diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index ceafcac179..f82a6cce56 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include // fix problem with std::min and std::max @@ -2747,7 +2748,15 @@ private: slot.i_batch = -1; - common_sampler_accept(slot.smpl.get(), id, true); + try { + common_sampler_accept(slot.smpl.get(), id, true); + } catch (std::runtime_error & e) { + SLT_ERR(slot, "Error when accepting token for sampler: %s\n", e.what()); + send_error(slot, std::string("Error when accepting token for sampler: ") + e.what(), ERROR_TYPE_SERVER); + slot.release(); + slot.i_batch = -1; + continue; // continue loop of slots + } // here we have synchronized the llama_context (due to the sampling above), so we can do time measurement const int64_t t_current = ggml_time_us(); diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index a137427c69..61e5ec5729 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1,12 +1,12 @@ -#include "server-common.h" #include "server-task.h" -#include "common.h" -#include "llama.h" #include "chat.h" +#include "common.h" +#include "json-schema-to-grammar.h" +#include "llama.h" #include "sampling.h" #include "speculative.h" -#include "json-schema-to-grammar.h" +#include "server-common.h" using json = nlohmann::ordered_json; @@ -18,8 +18,8 @@ json task_params::format_logit_bias(const std::vector & logit_ json data = json::array(); for (const auto & lb : logit_bias) { data.push_back(json{ - {"bias", lb.bias}, - {"token", lb.token}, + { "bias", lb.bias }, + { "token", lb.token }, }); } return data; @@ -34,41 +34,44 @@ json task_params::to_json(bool only_metrics) const { json lora = json::array(); for (auto & it : this->lora) { - lora.push_back({{"id", it.first}, {"scale", it.second}}); + lora.push_back({ + { "id", it.first }, + { "scale", it.second } + }); } if (only_metrics) { - return json { - {"seed", sampling.seed}, - {"temperature", sampling.temp}, - {"dynatemp_range", sampling.dynatemp_range}, - {"dynatemp_exponent", sampling.dynatemp_exponent}, - {"top_k", sampling.top_k}, - {"top_p", sampling.top_p}, - {"min_p", sampling.min_p}, - {"top_n_sigma", sampling.top_n_sigma}, - {"xtc_probability", sampling.xtc_probability}, - {"xtc_threshold", sampling.xtc_threshold}, - {"typical_p", sampling.typ_p}, - {"repeat_last_n", sampling.penalty_last_n}, - {"repeat_penalty", sampling.penalty_repeat}, - {"presence_penalty", sampling.penalty_present}, - {"frequency_penalty", sampling.penalty_freq}, - {"dry_multiplier", sampling.dry_multiplier}, - {"dry_base", sampling.dry_base}, - {"dry_allowed_length", sampling.dry_allowed_length}, - {"dry_penalty_last_n", sampling.dry_penalty_last_n}, - {"mirostat", sampling.mirostat}, - {"mirostat_tau", sampling.mirostat_tau}, - {"mirostat_eta", sampling.mirostat_eta}, - {"max_tokens", n_predict}, - {"n_predict", n_predict}, // TODO: deduplicate? - {"n_keep", n_keep}, - {"n_discard", n_discard}, - {"ignore_eos", sampling.ignore_eos}, - {"stream", stream}, - {"n_probs", sampling.n_probs}, - {"min_keep", sampling.min_keep}, + return json{ + { "seed", sampling.seed }, + { "temperature", sampling.temp }, + { "dynatemp_range", sampling.dynatemp_range }, + { "dynatemp_exponent", sampling.dynatemp_exponent }, + { "top_k", sampling.top_k }, + { "top_p", sampling.top_p }, + { "min_p", sampling.min_p }, + { "top_n_sigma", sampling.top_n_sigma }, + { "xtc_probability", sampling.xtc_probability }, + { "xtc_threshold", sampling.xtc_threshold }, + { "typical_p", sampling.typ_p }, + { "repeat_last_n", sampling.penalty_last_n }, + { "repeat_penalty", sampling.penalty_repeat }, + { "presence_penalty", sampling.penalty_present }, + { "frequency_penalty", sampling.penalty_freq }, + { "dry_multiplier", sampling.dry_multiplier }, + { "dry_base", sampling.dry_base }, + { "dry_allowed_length", sampling.dry_allowed_length }, + { "dry_penalty_last_n", sampling.dry_penalty_last_n }, + { "mirostat", sampling.mirostat }, + { "mirostat_tau", sampling.mirostat_tau }, + { "mirostat_eta", sampling.mirostat_eta }, + { "max_tokens", n_predict }, + { "n_predict", n_predict }, // TODO: deduplicate? + { "n_keep", n_keep }, + { "n_discard", n_discard }, + { "ignore_eos", sampling.ignore_eos }, + { "stream", stream }, + { "n_probs", sampling.n_probs }, + { "min_keep", sampling.min_keep }, {"chat_format", common_chat_format_name(chat_parser_params.format)}, {"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)}, {"reasoning_in_content", chat_parser_params.reasoning_in_content}, @@ -94,44 +97,44 @@ json task_params::to_json(bool only_metrics) const { grammar_triggers.push_back(ct.to_json()); } - return json { - {"seed", sampling.seed}, - {"temperature", sampling.temp}, - {"dynatemp_range", sampling.dynatemp_range}, - {"dynatemp_exponent", sampling.dynatemp_exponent}, - {"top_k", sampling.top_k}, - {"top_p", sampling.top_p}, - {"min_p", sampling.min_p}, - {"top_n_sigma", sampling.top_n_sigma}, - {"xtc_probability", sampling.xtc_probability}, - {"xtc_threshold", sampling.xtc_threshold}, - {"typical_p", sampling.typ_p}, - {"repeat_last_n", sampling.penalty_last_n}, - {"repeat_penalty", sampling.penalty_repeat}, - {"presence_penalty", sampling.penalty_present}, - {"frequency_penalty", sampling.penalty_freq}, - {"dry_multiplier", sampling.dry_multiplier}, - {"dry_base", sampling.dry_base}, - {"dry_allowed_length", sampling.dry_allowed_length}, - {"dry_penalty_last_n", sampling.dry_penalty_last_n}, - {"dry_sequence_breakers", sampling.dry_sequence_breakers}, - {"mirostat", sampling.mirostat}, - {"mirostat_tau", sampling.mirostat_tau}, - {"mirostat_eta", sampling.mirostat_eta}, - {"stop", antiprompt}, - {"max_tokens", n_predict}, - {"n_predict", n_predict}, // TODO: deduplicate? - {"n_keep", n_keep}, - {"n_discard", n_discard}, - {"ignore_eos", sampling.ignore_eos}, - {"stream", stream}, - {"logit_bias", format_logit_bias(sampling.logit_bias)}, - {"n_probs", sampling.n_probs}, - {"min_keep", sampling.min_keep}, - {"grammar", sampling.grammar}, - {"grammar_lazy", sampling.grammar_lazy}, - {"grammar_triggers", grammar_triggers}, - {"preserved_tokens", sampling.preserved_tokens}, + return json{ + { "seed", sampling.seed }, + { "temperature", sampling.temp }, + { "dynatemp_range", sampling.dynatemp_range }, + { "dynatemp_exponent", sampling.dynatemp_exponent }, + { "top_k", sampling.top_k }, + { "top_p", sampling.top_p }, + { "min_p", sampling.min_p }, + { "top_n_sigma", sampling.top_n_sigma }, + { "xtc_probability", sampling.xtc_probability }, + { "xtc_threshold", sampling.xtc_threshold }, + { "typical_p", sampling.typ_p }, + { "repeat_last_n", sampling.penalty_last_n }, + { "repeat_penalty", sampling.penalty_repeat }, + { "presence_penalty", sampling.penalty_present }, + { "frequency_penalty", sampling.penalty_freq }, + { "dry_multiplier", sampling.dry_multiplier }, + { "dry_base", sampling.dry_base }, + { "dry_allowed_length", sampling.dry_allowed_length }, + { "dry_penalty_last_n", sampling.dry_penalty_last_n }, + { "dry_sequence_breakers", sampling.dry_sequence_breakers }, + { "mirostat", sampling.mirostat }, + { "mirostat_tau", sampling.mirostat_tau }, + { "mirostat_eta", sampling.mirostat_eta }, + { "stop", antiprompt }, + { "max_tokens", n_predict }, + { "n_predict", n_predict }, // TODO: deduplicate? + { "n_keep", n_keep }, + { "n_discard", n_discard }, + { "ignore_eos", sampling.ignore_eos }, + { "stream", stream }, + { "logit_bias", format_logit_bias(sampling.logit_bias) }, + { "n_probs", sampling.n_probs }, + { "min_keep", sampling.min_keep }, + { "grammar", sampling.grammar }, + { "grammar_lazy", sampling.grammar_lazy }, + { "grammar_triggers", grammar_triggers }, + { "preserved_tokens", sampling.preserved_tokens }, {"chat_format", common_chat_format_name(chat_parser_params.format)}, {"reasoning_format", common_reasoning_format_name(chat_parser_params.reasoning_format)}, {"reasoning_in_content", chat_parser_params.reasoning_in_content}, @@ -154,21 +157,75 @@ json task_params::to_json(bool only_metrics) const { // // task_result_state // -common_chat_msg task_result_state::update_chat_msg( - const std::string & text_added, - bool is_partial, - std::vector & diffs) { +common_chat_msg task_result_state::update_chat_msg(const std::string & text_added, + bool is_partial, + std::vector & diffs, + bool filter_tool_calls) { generated_text += text_added; auto msg_prv_copy = chat_msg; SRV_DBG("Parsing chat message: %s\n", generated_text.c_str()); - auto new_msg = common_chat_parse( - generated_text, - is_partial, - chat_parser_params); + auto new_msg = common_chat_parse(generated_text, is_partial, chat_parser_params); if (!new_msg.empty()) { new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id); - chat_msg = new_msg; - diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, new_msg.empty() ? msg_prv_copy : new_msg); + chat_msg = new_msg; + auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg); + + if (!filter_tool_calls) { + diffs = std::move(all_diffs); + } else { + for (auto & d : all_diffs) { + // If this is a new type of delta, flush all currently pending tool call names + for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) { + if (sent_tool_call_names.count(i) || chat_msg.tool_calls[i].name.empty()) { + continue; + } + if (d.tool_call_index != i || !d.tool_call_delta.arguments.empty()) { + common_chat_msg_diff header; + header.tool_call_index = i; + header.tool_call_delta.id = chat_msg.tool_calls[i].id; + header.tool_call_delta.name = chat_msg.tool_calls[i].name; + diffs.push_back(std::move(header)); + sent_tool_call_names.insert(i); + } + } + + if (d.tool_call_index == std::string::npos) { + diffs.push_back(std::move(d)); + } else { + size_t i = d.tool_call_index; + if (sent_tool_call_names.count(i)) { + if (!d.tool_call_delta.arguments.empty()) { + d.tool_call_delta.name = ""; + d.tool_call_delta.id = ""; + diffs.push_back(std::move(d)); + } + } else { + // Not sent yet. + if (!d.tool_call_delta.arguments.empty() || !is_partial) { + d.tool_call_delta.name = chat_msg.tool_calls[i].name; + d.tool_call_delta.id = chat_msg.tool_calls[i].id; + diffs.push_back(std::move(d)); + sent_tool_call_names.insert(i); + } else { + // Suppress + } + } + } + } + // Final check at EOF + if (!is_partial) { + for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) { + if (!sent_tool_call_names.count(i) && !chat_msg.tool_calls[i].name.empty()) { + common_chat_msg_diff header; + header.tool_call_index = i; + header.tool_call_delta.id = chat_msg.tool_calls[i].id; + header.tool_call_delta.name = chat_msg.tool_calls[i].name; + diffs.push_back(std::move(header)); + sent_tool_call_names.insert(i); + } + } + } + } } return chat_msg; } @@ -177,11 +234,10 @@ common_chat_msg task_result_state::update_chat_msg( // server_task // -task_params server_task::params_from_json_cmpl( - const llama_vocab * vocab, - const common_params & params_base, - const int n_ctx_slot, - const json & data) { +task_params server_task::params_from_json_cmpl(const llama_vocab * vocab, + const common_params & params_base, + const int n_ctx_slot, + const json & data) { task_params params; // Sampling parameter defaults are loaded from the global server context (but individual requests can still them) @@ -211,8 +267,8 @@ task_params server_task::params_from_json_cmpl( params.n_cmpl = json_value(data, "n_cmpl", json_value(data, "n", 1)); params.n_cache_reuse = json_value(data, "n_cache_reuse", defaults.n_cache_reuse); //params.t_max_prompt_ms = json_value(data, "t_max_prompt_ms", defaults.t_max_prompt_ms); // TODO: implement - params.t_max_predict_ms = json_value(data, "t_max_predict_ms", defaults.t_max_predict_ms); - params.response_fields = json_value(data, "response_fields", std::vector()); + params.t_max_predict_ms = json_value(data, "t_max_predict_ms", defaults.t_max_predict_ms); + params.response_fields = json_value(data, "response_fields", std::vector()); params.sampling.top_k = json_value(data, "top_k", defaults.sampling.top_k); params.sampling.top_p = json_value(data, "top_p", defaults.sampling.top_p); @@ -262,7 +318,7 @@ task_params server_task::params_from_json_cmpl( params.speculative.ngram_min_hits = std::max(std::min(1, (int) params.speculative.ngram_min_hits), 1024); // Use OpenAI API logprobs only if n_probs wasn't provided - if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs){ + if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs) { params.sampling.n_probs = json_value(data, "logprobs", defaults.sampling.n_probs); } @@ -305,7 +361,8 @@ task_params server_task::params_from_json_cmpl( // Ref: https://github.com/oobabooga/text-generation-webui/blob/d1af7a41ade7bd3c3a463bfa640725edb818ebaf/extensions/openai/typing.py#L39 if (data.contains("dry_sequence_breakers")) { - params.sampling.dry_sequence_breakers = json_value(data, "dry_sequence_breakers", std::vector()); + params.sampling.dry_sequence_breakers = + json_value(data, "dry_sequence_breakers", std::vector()); if (params.sampling.dry_sequence_breakers.empty()) { throw std::runtime_error("Error: dry_sequence_breakers must be a non-empty array of strings"); } @@ -315,15 +372,15 @@ task_params server_task::params_from_json_cmpl( // process "json_schema" and "grammar" if (data.contains("json_schema") && !data.contains("grammar")) { try { - auto schema = json_value(data, "json_schema", json::object()); + auto schema = json_value(data, "json_schema", json::object()); SRV_DBG("JSON schema: %s\n", schema.dump(2).c_str()); - params.sampling.grammar = json_schema_to_grammar(schema); + params.sampling.grammar = json_schema_to_grammar(schema); SRV_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str()); } catch (const std::exception & e) { throw std::runtime_error(std::string("\"json_schema\": ") + e.what()); } } else { - params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); + params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); SRV_DBG("Grammar: %s\n", params.sampling.grammar.c_str()); params.sampling.grammar_lazy = json_value(data, "grammar_lazy", defaults.sampling.grammar_lazy); SRV_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false"); @@ -342,9 +399,10 @@ task_params server_task::params_from_json_cmpl( reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get()); } params.chat_parser_params.reasoning_format = reasoning_format; - params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); + params.chat_parser_params.reasoning_in_content = + params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); params.chat_parser_params.thinking_forced_open = json_value(data, "thinking_forced_open", false); - params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false); + params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false); if (data.contains("chat_parser")) { params.chat_parser_params.parser.load(data.at("chat_parser").get()); } @@ -354,7 +412,8 @@ task_params server_task::params_from_json_cmpl( const auto preserved_tokens = data.find("preserved_tokens"); if (preserved_tokens != data.end()) { for (const auto & t : *preserved_tokens) { - auto ids = common_tokenize(vocab, t.get(), /* add_special= */ false, /* parse_special= */ true); + auto ids = + common_tokenize(vocab, t.get(), /* add_special= */ false, /* parse_special= */ true); if (ids.size() == 1) { SRV_DBG("Preserved token: %d\n", ids[0]); params.sampling.preserved_tokens.insert(ids[0]); @@ -373,18 +432,20 @@ task_params server_task::params_from_json_cmpl( auto ids = common_tokenize(vocab, word, /* add_special= */ false, /* parse_special= */ true); if (ids.size() == 1) { auto token = ids[0]; - if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) { - throw std::runtime_error("Grammar trigger word should be marked as preserved token: " + word); + if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), + (llama_token) token) == params.sampling.preserved_tokens.end()) { + throw std::runtime_error("Grammar trigger word should be marked as preserved token: " + + word); } SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str()); common_grammar_trigger trigger; - trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN; + trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN; trigger.value = word; trigger.token = token; params.sampling.grammar_triggers.push_back(std::move(trigger)); } else { SRV_DBG("Grammar trigger word: `%s`\n", word.c_str()); - params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word}); + params.sampling.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word }); } } else { if (ct.value.type == COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN) { @@ -424,12 +485,12 @@ task_params server_task::params_from_json_cmpl( if (el[0].is_number_integer()) { llama_token tok = el[0].get(); if (tok >= 0 && tok < n_vocab) { - params.sampling.logit_bias.push_back({tok, bias}); + params.sampling.logit_bias.push_back({ tok, bias }); } } else if (el[0].is_string()) { auto toks = common_tokenize(vocab, el[0].get(), false); for (auto tok : toks) { - params.sampling.logit_bias.push_back({tok, bias}); + params.sampling.logit_bias.push_back({ tok, bias }); } } } @@ -437,8 +498,8 @@ task_params server_task::params_from_json_cmpl( } else if (logit_bias != data.end() && logit_bias->is_object()) { const int n_vocab = llama_vocab_n_tokens(vocab); for (const auto & el : logit_bias->items()) { - float bias; - const auto & key = el.key(); + float bias; + const auto & key = el.key(); const auto & value = el.value(); if (value.is_number()) { bias = value.get(); @@ -448,16 +509,16 @@ task_params server_task::params_from_json_cmpl( continue; } - char *end; + char * end; llama_token tok = strtol(key.c_str(), &end, 10); if (*end == 0) { if (tok >= 0 && tok < n_vocab) { - params.sampling.logit_bias.push_back({tok, bias}); + params.sampling.logit_bias.push_back({ tok, bias }); } } else { auto toks = common_tokenize(vocab, key, false); for (auto tok : toks) { - params.sampling.logit_bias.push_back({tok, bias}); + params.sampling.logit_bias.push_back({ tok, bias }); } } } @@ -465,9 +526,9 @@ task_params server_task::params_from_json_cmpl( params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos); if (params.sampling.ignore_eos) { - params.sampling.logit_bias.insert( - params.sampling.logit_bias.end(), - defaults.sampling.logit_bias_eog.begin(), defaults.sampling.logit_bias_eog.end()); + params.sampling.logit_bias.insert(params.sampling.logit_bias.end(), + defaults.sampling.logit_bias_eog.begin(), + defaults.sampling.logit_bias_eog.end()); } } @@ -493,7 +554,7 @@ task_params server_task::params_from_json_cmpl( if (samplers != data.end()) { if (samplers->is_array()) { params.sampling.samplers = common_sampler_types_from_names(*samplers, false); - } else if (samplers->is_string()){ + } else if (samplers->is_string()) { params.sampling.samplers = common_sampler_types_from_chars(samplers->get()); } } else { @@ -514,21 +575,21 @@ task_params server_task::params_from_json_cmpl( json result_timings::to_json() const { json base = { - {"cache_n", cache_n}, + { "cache_n", cache_n }, - {"prompt_n", prompt_n}, - {"prompt_ms", prompt_ms}, - {"prompt_per_token_ms", prompt_per_token_ms}, - {"prompt_per_second", prompt_per_second}, + { "prompt_n", prompt_n }, + { "prompt_ms", prompt_ms }, + { "prompt_per_token_ms", prompt_per_token_ms }, + { "prompt_per_second", prompt_per_second }, - {"predicted_n", predicted_n}, - {"predicted_ms", predicted_ms}, - {"predicted_per_token_ms", predicted_per_token_ms}, - {"predicted_per_second", predicted_per_second}, + { "predicted_n", predicted_n }, + { "predicted_ms", predicted_ms }, + { "predicted_per_token_ms", predicted_per_token_ms }, + { "predicted_per_second", predicted_per_second }, }; if (draft_n > 0) { - base["draft_n"] = draft_n; + base["draft_n"] = draft_n; base["draft_n_accepted"] = draft_n_accepted; } @@ -539,20 +600,24 @@ json result_timings::to_json() const { // result_prompt_progress // json result_prompt_progress::to_json() const { - return json { - {"total", total}, - {"cache", cache}, - {"processed", processed}, - {"time_ms", time_ms}, + return json{ + { "total", total }, + { "cache", cache }, + { "processed", processed }, + { "time_ms", time_ms }, }; } static inline std::string stop_type_to_str(stop_type type) { switch (type) { - case STOP_TYPE_EOS: return "eos"; - case STOP_TYPE_WORD: return "word"; - case STOP_TYPE_LIMIT: return "limit"; - default: return "none"; + case STOP_TYPE_EOS: + return "eos"; + case STOP_TYPE_WORD: + return "word"; + case STOP_TYPE_LIMIT: + return "limit"; + default: + return "none"; } } @@ -565,36 +630,28 @@ json completion_token_output::to_json(bool post_sampling_probs) const { for (const auto & p : probs) { std::string txt(p.txt); txt.resize(validate_utf8(txt)); - probs_for_token.push_back(json { - {"id", p.tok}, - {"token", txt}, - {"bytes", str_to_bytes(p.txt)}, - { - post_sampling_probs ? "prob" : "logprob", - post_sampling_probs ? p.prob : logarithm(p.prob) - }, + probs_for_token.push_back(json{ + { "id", p.tok }, + { "token", txt }, + { "bytes", str_to_bytes(p.txt) }, + { post_sampling_probs ? "prob" : "logprob", post_sampling_probs ? p.prob : logarithm(p.prob) }, }); } return probs_for_token; } -json completion_token_output::probs_vector_to_json(const std::vector & probs, bool post_sampling_probs) { +json completion_token_output::probs_vector_to_json(const std::vector & probs, + bool post_sampling_probs) { json out = json::array(); for (const auto & p : probs) { std::string txt(p.text_to_send); txt.resize(validate_utf8(txt)); - out.push_back(json { - {"id", p.tok}, - {"token", txt}, - {"bytes", str_to_bytes(p.text_to_send)}, - { - post_sampling_probs ? "prob" : "logprob", - post_sampling_probs ? p.prob : logarithm(p.prob) - }, - { - post_sampling_probs ? "top_probs" : "top_logprobs", - p.to_json(post_sampling_probs) - }, + out.push_back(json{ + { "id", p.tok }, + { "token", txt }, + { "bytes", str_to_bytes(p.text_to_send) }, + { post_sampling_probs ? "prob" : "logprob", post_sampling_probs ? p.prob : logarithm(p.prob) }, + { post_sampling_probs ? "top_probs" : "top_logprobs", p.to_json(post_sampling_probs) }, }); } return out; @@ -635,61 +692,58 @@ json server_task_result_cmpl_final::to_json() { } json server_task_result_cmpl_final::to_json_non_oaicompat() { - json res = json { - {"index", index}, - {"content", content}, - {"tokens", tokens}, - {"id_slot", id_slot}, - {"stop", true}, - {"model", oaicompat_model}, - {"tokens_predicted", n_decoded}, - {"tokens_evaluated", n_prompt_tokens}, - {"generation_settings", generation_params.to_json()}, - {"prompt", prompt}, - {"has_new_line", has_new_line}, - {"truncated", truncated}, - {"stop_type", stop_type_to_str(stop)}, - {"stopping_word", stopping_word}, - {"tokens_cached", n_tokens_cached}, - {"timings", timings.to_json()}, + json res = json{ + { "index", index }, + { "content", content }, + { "tokens", tokens }, + { "id_slot", id_slot }, + { "stop", true }, + { "model", oaicompat_model }, + { "tokens_predicted", n_decoded }, + { "tokens_evaluated", n_prompt_tokens }, + { "generation_settings", generation_params.to_json() }, + { "prompt", prompt }, + { "has_new_line", has_new_line }, + { "truncated", truncated }, + { "stop_type", stop_type_to_str(stop) }, + { "stopping_word", stopping_word }, + { "tokens_cached", n_tokens_cached }, + { "timings", timings.to_json() }, }; if (!stream && !probs_output.empty()) { - res["completion_probabilities"] = completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs); + res["completion_probabilities"] = + completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs); } return response_fields.empty() ? res : json_get_nested_values(response_fields, res); } json server_task_result_cmpl_final::to_json_oaicompat() { - std::time_t t = std::time(0); - json logprobs = json(nullptr); // OAI default to null + std::time_t t = std::time(0); + json logprobs = json(nullptr); // OAI default to null if (!stream && probs_output.size() > 0) { logprobs = json{ - {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)}, + { "content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs) }, }; } json finish_reason = "length"; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { finish_reason = "stop"; } - json res = json { - {"choices", json::array({ - json{ - {"text", content}, - {"index", index}, - {"logprobs", logprobs}, - {"finish_reason", finish_reason}, - } - })}, - {"created", t}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "text_completion"}, - {"usage", json { - {"completion_tokens", n_decoded}, - {"prompt_tokens", n_prompt_tokens}, - {"total_tokens", n_decoded + n_prompt_tokens} - }}, - {"id", oaicompat_cmpl_id} + json res = json{ + { "choices", json::array({ json{ + { "text", content }, + { "index", index }, + { "logprobs", logprobs }, + { "finish_reason", finish_reason }, + } }) }, + { "created", t }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "text_completion" }, + { "usage", json{ { "completion_tokens", n_decoded }, + { "prompt_tokens", n_prompt_tokens }, + { "total_tokens", n_decoded + n_prompt_tokens } } }, + { "id", oaicompat_cmpl_id } }; // extra fields for debugging purposes @@ -697,19 +751,19 @@ json server_task_result_cmpl_final::to_json_oaicompat() { res["__verbose"] = to_json_non_oaicompat(); } if (timings.prompt_n >= 0) { - res.push_back({"timings", timings.to_json()}); + res.push_back({ "timings", timings.to_json() }); } return res; } json server_task_result_cmpl_final::to_json_oaicompat_chat() { - std::string finish_reason = "length"; + std::string finish_reason = "length"; common_chat_msg msg; if (!oaicompat_msg.empty()) { msg = oaicompat_msg; } else { - msg.role = "assistant"; + msg.role = "assistant"; msg.content = content; } if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { @@ -724,24 +778,22 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() { if (!stream && probs_output.size() > 0) { choice["logprobs"] = json{ - {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)}, + { "content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs) }, }; } std::time_t t = std::time(0); - json res = json { - {"choices", json::array({choice})}, - {"created", t}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "chat.completion"}, - {"usage", json { - {"completion_tokens", n_decoded}, - {"prompt_tokens", n_prompt_tokens}, - {"total_tokens", n_decoded + n_prompt_tokens} - }}, - {"id", oaicompat_cmpl_id} + json res = json{ + { "choices", json::array({ choice }) }, + { "created", t }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "chat.completion" }, + { "usage", json{ { "completion_tokens", n_decoded }, + { "prompt_tokens", n_prompt_tokens }, + { "total_tokens", n_decoded + n_prompt_tokens } } }, + { "id", oaicompat_cmpl_id } }; // extra fields for debugging purposes @@ -749,14 +801,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() { res["__verbose"] = to_json_non_oaicompat(); } if (timings.prompt_n >= 0) { - res.push_back({"timings", timings.to_json()}); + res.push_back({ "timings", timings.to_json() }); } return res; } json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { - std::time_t t = std::time(0); + std::time_t t = std::time(0); std::string finish_reason = "length"; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { finish_reason = oaicompat_msg.tool_calls.empty() ? "stop" : "tool_calls"; @@ -781,40 +833,41 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { } deltas.push_back({ - {"choices", json::array({ - json { - {"finish_reason", finish_reason}, - {"index", 0}, - {"delta", json::object()}, - }, - })}, - {"created", t}, - {"id", oaicompat_cmpl_id}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "chat.completion.chunk"}, + { "choices", json::array({ + json{ + { "finish_reason", finish_reason }, + { "index", 0 }, + { "delta", json::object() }, + }, + }) }, + { "created", t }, + { "id", oaicompat_cmpl_id }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "chat.completion.chunk" }, }); if (include_usage) { // OpenAI API spec for chat.completion.chunks specifies an empty `choices` array for the last chunk when including usage // https://platform.openai.com/docs/api-reference/chat_streaming/streaming#chat_streaming/streaming-choices deltas.push_back({ - {"choices", json::array()}, - {"created", t}, - {"id", oaicompat_cmpl_id}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "chat.completion.chunk"}, - {"usage", json { - {"completion_tokens", n_decoded}, - {"prompt_tokens", n_prompt_tokens}, - {"total_tokens", n_decoded + n_prompt_tokens}, - }}, + { "choices", json::array() }, + { "created", t }, + { "id", oaicompat_cmpl_id }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "chat.completion.chunk" }, + { "usage", + json{ + { "completion_tokens", n_decoded }, + { "prompt_tokens", n_prompt_tokens }, + { "total_tokens", n_decoded + n_prompt_tokens }, + } }, }); } if (timings.prompt_n >= 0) { - deltas.back().push_back({"timings", timings.to_json()}); + deltas.back().push_back({ "timings", timings.to_json() }); } // extra fields for debugging purposes @@ -1017,7 +1070,7 @@ json server_task_result_cmpl_final::to_json_anthropic() { if (!oaicompat_msg.empty()) { msg = oaicompat_msg; } else { - msg.role = "assistant"; + msg.role = "assistant"; msg.content = content; } @@ -1032,16 +1085,16 @@ json server_task_result_cmpl_final::to_json_anthropic() { if (!msg.content.empty()) { content_blocks.push_back({ - {"type", "text"}, - {"text", msg.content} + { "type", "text" }, + { "text", msg.content } }); } for (const auto & tool_call : msg.tool_calls) { json tool_use_block = { - {"type", "tool_use"}, - {"id", tool_call.id}, - {"name", tool_call.name} + { "type", "tool_use" }, + { "id", tool_call.id }, + { "name", tool_call.name } }; try { @@ -1054,17 +1107,14 @@ json server_task_result_cmpl_final::to_json_anthropic() { } json res = { - {"id", oaicompat_cmpl_id}, - {"type", "message"}, - {"role", "assistant"}, - {"content", content_blocks}, - {"model", oaicompat_model}, - {"stop_reason", stop_reason}, - {"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)}, - {"usage", { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", n_decoded} - }} + { "id", oaicompat_cmpl_id }, + { "type", "message" }, + { "role", "assistant" }, + { "content", content_blocks }, + { "model", oaicompat_model }, + { "stop_reason", stop_reason }, + { "stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word) }, + { "usage", { { "input_tokens", n_prompt_tokens }, { "output_tokens", n_decoded } } } }; return res; @@ -1159,31 +1209,27 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() { const auto & full_tool_call = oaicompat_msg.tool_calls[diff.tool_call_index]; events.push_back({ - {"event", "content_block_start"}, - {"data", { - {"type", "content_block_start"}, - {"index", content_block_index}, - {"content_block", { - {"type", "tool_use"}, - {"id", full_tool_call.id}, - {"name", full_tool_call.name} - }} - }} + { "event", "content_block_start" }, + { "data", + { { "type", "content_block_start" }, + { "index", content_block_index }, + { "content_block", + { { "type", "tool_use" }, + { "id", full_tool_call.id }, + { "name", full_tool_call.name } } } } } }); tool_calls_started.insert(diff.tool_call_index); } if (!diff.tool_call_delta.arguments.empty()) { events.push_back({ - {"event", "content_block_delta"}, - {"data", { - {"type", "content_block_delta"}, - {"index", content_block_index}, - {"delta", { - {"type", "input_json_delta"}, - {"partial_json", diff.tool_call_delta.arguments} - }} - }} + { "event", "content_block_delta" }, + { "data", + { { "type", "content_block_delta" }, + { "index", content_block_index }, + { "delta", + { { "type", "input_json_delta" }, + { "partial_json", diff.tool_call_delta.arguments } } } } } }); } } @@ -1226,33 +1272,24 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() { for (size_t i = 0; i < num_tool_calls; i++) { size_t content_block_index = (has_thinking ? 1 : 0) + (has_text ? 1 : 0) + i; events.push_back({ - {"event", "content_block_stop"}, - {"data", { - {"type", "content_block_stop"}, - {"index", content_block_index} - }} + { "event", "content_block_stop" }, + { "data", { { "type", "content_block_stop" }, { "index", content_block_index } } } }); } events.push_back({ - {"event", "message_delta"}, - {"data", { - {"type", "message_delta"}, - {"delta", { - {"stop_reason", stop_reason}, - {"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)} - }}, - {"usage", { - {"output_tokens", n_decoded} - }} - }} + { "event", "message_delta" }, + { "data", + { { "type", "message_delta" }, + { "delta", + { { "stop_reason", stop_reason }, + { "stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word) } } }, + { "usage", { { "output_tokens", n_decoded } } } } } }); events.push_back({ - {"event", "message_stop"}, - {"data", { - {"type", "message_stop"} - }} + { "event", "message_stop" }, + { "data", { { "type", "message_stop" } } } }); return events; @@ -1311,50 +1348,49 @@ json server_task_result_cmpl_partial::to_json() { json server_task_result_cmpl_partial::to_json_non_oaicompat() { // non-OAI-compat JSON - json res = json { - {"index", index}, - {"content", content}, - {"tokens", tokens}, - {"stop", false}, - {"id_slot", id_slot}, - {"tokens_predicted", n_decoded}, - {"tokens_evaluated", n_prompt_tokens}, + json res = json{ + { "index", index }, + { "content", content }, + { "tokens", tokens }, + { "stop", false }, + { "id_slot", id_slot }, + { "tokens_predicted", n_decoded }, + { "tokens_evaluated", n_prompt_tokens }, }; // populate the timings object when needed (usually for the last response or with timings_per_token enabled) if (timings.prompt_n > 0) { - res.push_back({"timings", timings.to_json()}); + res.push_back({ "timings", timings.to_json() }); } if (is_progress) { - res.push_back({"prompt_progress", progress.to_json()}); + res.push_back({ "prompt_progress", progress.to_json() }); } if (!prob_output.probs.empty()) { - res["completion_probabilities"] = completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs); + res["completion_probabilities"] = + completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs); } return res; } json server_task_result_cmpl_partial::to_json_oaicompat() { - std::time_t t = std::time(0); - json logprobs = json(nullptr); // OAI default to null + std::time_t t = std::time(0); + json logprobs = json(nullptr); // OAI default to null if (prob_output.probs.size() > 0) { logprobs = json{ - {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)}, + { "content", completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs) }, }; } - json res = json { - {"choices", json::array({ - json{ - {"text", content}, - {"index", index}, - {"logprobs", logprobs}, - {"finish_reason", nullptr}, - } - })}, - {"created", t}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "text_completion"}, - {"id", oaicompat_cmpl_id} + json res = json{ + { "choices", json::array({ json{ + { "text", content }, + { "index", index }, + { "logprobs", logprobs }, + { "finish_reason", nullptr }, + } }) }, + { "created", t }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "text_completion" }, + { "id", oaicompat_cmpl_id } }; // extra fields for debugging purposes @@ -1362,42 +1398,42 @@ json server_task_result_cmpl_partial::to_json_oaicompat() { res["__verbose"] = to_json_non_oaicompat(); } if (timings.prompt_n >= 0) { - res.push_back({"timings", timings.to_json()}); + res.push_back({ "timings", timings.to_json() }); } if (is_progress) { - res.push_back({"prompt_progress", progress.to_json()}); + res.push_back({ "prompt_progress", progress.to_json() }); } return res; } json server_task_result_cmpl_partial::to_json_oaicompat_chat() { - bool first = n_decoded == 1; - std::time_t t = std::time(0); - json choices; + bool first = n_decoded == 1; + std::time_t t = std::time(0); + json choices; std::vector deltas; - auto add_delta = [&](const json & delta) { + auto add_delta = [&](const json & delta) { deltas.push_back({ - {"choices", json::array({ - json { - {"finish_reason", nullptr}, - {"index", index}, - {"delta", delta}, - }, - })}, - {"created", t}, - {"id", oaicompat_cmpl_id}, - {"model", oaicompat_model}, - {"system_fingerprint", build_info}, - {"object", "chat.completion.chunk"}, + { "choices", json::array({ + json{ + { "finish_reason", nullptr }, + { "index", index }, + { "delta", delta }, + }, + }) }, + { "created", t }, + { "id", oaicompat_cmpl_id }, + { "model", oaicompat_model }, + { "system_fingerprint", build_info }, + { "object", "chat.completion.chunk" }, }); }; // We have to send an initial update to conform to openai behavior if (first || is_progress) { add_delta({ - {"role", "assistant"}, - {"content", nullptr}, + { "role", "assistant" }, + { "content", nullptr }, }); } @@ -1410,16 +1446,16 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() { GGML_ASSERT(last_json.at("choices").size() >= 1); if (prob_output.probs.size() > 0) { - last_json.at("choices").at(0)["logprobs"] = json { - {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)}, + last_json.at("choices").at(0)["logprobs"] = json{ + { "content", completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs) }, }; } if (timings.prompt_n >= 0) { - last_json.push_back({"timings", timings.to_json()}); + last_json.push_back({ "timings", timings.to_json() }); } if (is_progress) { - last_json.push_back({"prompt_progress", progress.to_json()}); + last_json.push_back({ "prompt_progress", progress.to_json() }); } } @@ -1560,23 +1596,18 @@ json server_task_result_cmpl_partial::to_json_anthropic() { if (first) { events.push_back({ - {"event", "message_start"}, - {"data", { - {"type", "message_start"}, - {"message", { - {"id", oaicompat_cmpl_id}, - {"type", "message"}, - {"role", "assistant"}, - {"content", json::array()}, - {"model", oaicompat_model}, - {"stop_reason", nullptr}, - {"stop_sequence", nullptr}, - {"usage", { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", 0} - }} - }} - }} + { "event", "message_start" }, + { "data", + { { "type", "message_start" }, + { "message", + { { "id", oaicompat_cmpl_id }, + { "type", "message" }, + { "role", "assistant" }, + { "content", json::array() }, + { "model", oaicompat_model }, + { "stop_reason", nullptr }, + { "stop_sequence", nullptr }, + { "usage", { { "input_tokens", n_prompt_tokens }, { "output_tokens", 0 } } } } } } } }); } @@ -1658,30 +1689,26 @@ json server_task_result_cmpl_partial::to_json_anthropic() { if (!diff.tool_call_delta.name.empty()) { events.push_back({ - {"event", "content_block_start"}, - {"data", { - {"type", "content_block_start"}, - {"index", content_block_index}, - {"content_block", { - {"type", "tool_use"}, - {"id", diff.tool_call_delta.id}, - {"name", diff.tool_call_delta.name} - }} - }} + { "event", "content_block_start" }, + { "data", + { { "type", "content_block_start" }, + { "index", content_block_index }, + { "content_block", + { { "type", "tool_use" }, + { "id", diff.tool_call_delta.id }, + { "name", diff.tool_call_delta.name } } } } } }); } if (!diff.tool_call_delta.arguments.empty()) { events.push_back({ - {"event", "content_block_delta"}, - {"data", { - {"type", "content_block_delta"}, - {"index", content_block_index}, - {"delta", { - {"type", "input_json_delta"}, - {"partial_json", diff.tool_call_delta.arguments} - }} - }} + { "event", "content_block_delta" }, + { "data", + { { "type", "content_block_delta" }, + { "index", content_block_index }, + { "delta", + { { "type", "input_json_delta" }, + { "partial_json", diff.tool_call_delta.arguments } } } } } }); } } @@ -1741,28 +1768,28 @@ json server_task_result_error::to_json() { // server_task_result_metrics // json server_task_result_metrics::to_json() { - return json { - { "idle", n_idle_slots }, - { "processing", n_processing_slots }, - { "deferred", n_tasks_deferred }, - { "t_start", t_start }, + return json{ + { "idle", n_idle_slots }, + { "processing", n_processing_slots }, + { "deferred", n_tasks_deferred }, + { "t_start", t_start }, { "n_prompt_tokens_processed_total", n_prompt_tokens_processed_total }, - { "t_tokens_generation_total", t_tokens_generation_total }, - { "n_tokens_predicted_total", n_tokens_predicted_total }, - { "t_prompt_processing_total", t_prompt_processing_total }, + { "t_tokens_generation_total", t_tokens_generation_total }, + { "n_tokens_predicted_total", n_tokens_predicted_total }, + { "t_prompt_processing_total", t_prompt_processing_total }, - { "n_tokens_max", n_tokens_max }, + { "n_tokens_max", n_tokens_max }, - { "n_prompt_tokens_processed", n_prompt_tokens_processed }, - { "t_prompt_processing", t_prompt_processing }, - { "n_tokens_predicted", n_tokens_predicted }, - { "t_tokens_generation", t_tokens_generation }, + { "n_prompt_tokens_processed", n_prompt_tokens_processed }, + { "t_prompt_processing", t_prompt_processing }, + { "n_tokens_predicted", n_tokens_predicted }, + { "t_tokens_generation", t_tokens_generation }, - { "n_decode_total", n_decode_total }, - { "n_busy_slots_total", n_busy_slots_total }, + { "n_decode_total", n_decode_total }, + { "n_busy_slots_total", n_busy_slots_total }, - { "slots", slots_data }, + { "slots", slots_data }, }; } @@ -1771,25 +1798,21 @@ json server_task_result_metrics::to_json() { // json server_task_result_slot_save_load::to_json() { if (is_save) { - return json { - { "id_slot", id_slot }, - { "filename", filename }, - { "n_saved", n_tokens }, - { "n_written", n_bytes }, - { "timings", { - { "save_ms", t_ms } - }}, + return json{ + { "id_slot", id_slot }, + { "filename", filename }, + { "n_saved", n_tokens }, + { "n_written", n_bytes }, + { "timings", { { "save_ms", t_ms } } }, }; } - return json { - { "id_slot", id_slot }, - { "filename", filename }, - { "n_restored", n_tokens }, - { "n_read", n_bytes }, - { "timings", { - { "restore_ms", t_ms } - }}, + return json{ + { "id_slot", id_slot }, + { "filename", filename }, + { "n_restored", n_tokens }, + { "n_read", n_bytes }, + { "timings", { { "restore_ms", t_ms } } }, }; } @@ -1797,8 +1820,8 @@ json server_task_result_slot_save_load::to_json() { // server_task_result_slot_erase // json server_task_result_slot_erase::to_json() { - return json { - { "id_slot", id_slot }, + return json{ + { "id_slot", id_slot }, { "n_erased", n_erased }, }; } @@ -1810,13 +1833,13 @@ json server_task_result_slot_erase::to_json() { json server_task_result_get_lora::to_json() { json result = json::array(); for (size_t i = 0; i < loras.size(); ++i) { - auto & lora = loras[i]; - json entry = { - {"id", i}, - {"path", lora.info.path}, - {"scale", lora.info.scale}, - {"task_name", lora.info.task_name}, - {"prompt_prefix", lora.info.prompt_prefix}, + auto & lora = loras[i]; + json entry = { + { "id", i }, + { "path", lora.info.path }, + { "scale", lora.info.scale }, + { "task_name", lora.info.task_name }, + { "prompt_prefix", lora.info.prompt_prefix }, }; if (!lora.alora_invocation_tokens.empty()) { entry["alora_invocation_string"] = lora.alora_invocation_string; @@ -1832,7 +1855,9 @@ json server_task_result_get_lora::to_json() { // json server_task_result_apply_lora::to_json() { - return json {{ "success", true }}; + return json{ + { "success", true } + }; } // @@ -1890,7 +1915,7 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t } catch (const std::bad_alloc & e) { SRV_ERR("failed to allocate memory for prompt cache state: %s\n", e.what()); - limit_size = std::max(1, 0.4*size()); + limit_size = std::max(1, 0.4 * size()); SRV_WRN(" - cache size limit reduced to %.3f MiB\n", limit_size / (1024.0 * 1024.0)); @@ -1901,16 +1926,19 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t // TODO: for some reason we can't copy server_tokens, so we have to do this workaround auto & cur = states.emplace_back(); - cur = { - /*.tokens =*/ server_tokens(prompt.tokens.get_text_tokens(), false), - /*.data =*/ std::move(state_data), - /*.checkpoints =*/ prompt.checkpoints, + cur = { + /*.tokens =*/server_tokens(prompt.tokens.get_text_tokens(), false), + /*.data =*/std::move(state_data), + /*.checkpoints =*/prompt.checkpoints, }; return &cur; } -bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tokens_new, llama_context * ctx, int32_t id_slot) { +bool server_prompt_cache::load(server_prompt & prompt, + const server_tokens & tokens_new, + llama_context * ctx, + int32_t id_slot) { const int lcp_best = prompt.tokens.get_common_prefix(tokens_new); float f_keep_best = float(lcp_best) / prompt.tokens.size(); @@ -1944,7 +1972,7 @@ bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tok SRV_WRN(" - found better prompt with f_keep = %.3f, sim = %.3f\n", f_keep_best, sim_best); const size_t size = it_best->data.size(); - const size_t n = llama_state_seq_set_data_ext(ctx, it_best->data.data(), size, id_slot, 0); + const size_t n = llama_state_seq_set_data_ext(ctx, it_best->data.data(), size, id_slot, 0); if (n != size) { SRV_WRN("failed to restore state with size %zu\n", size); @@ -1970,7 +1998,8 @@ void server_prompt_cache::update() { break; } - SRV_WRN(" - cache size limit reached, removing oldest entry (size = %.3f MiB)\n", states.front().size() / (1024.0 * 1024.0)); + SRV_WRN(" - cache size limit reached, removing oldest entry (size = %.3f MiB)\n", + states.front().size() / (1024.0 * 1024.0)); states.pop_front(); } @@ -1980,7 +2009,8 @@ void server_prompt_cache::update() { const float size_per_token = std::max(1.0f, float(size()) / (std::max(1, n_tokens()))); // dynamically increase the token limit if it can fit in the memory limit - const size_t limit_tokens_cur = limit_size > 0 ? std::max(limit_tokens, limit_size/size_per_token) : limit_tokens; + const size_t limit_tokens_cur = + limit_size > 0 ? std::max(limit_tokens, limit_size / size_per_token) : limit_tokens; if (limit_tokens > 0) { while (states.size() > 1 && n_tokens() > limit_tokens_cur) { @@ -1995,11 +2025,11 @@ void server_prompt_cache::update() { } } - SRV_WRN(" - cache state: %zu prompts, %.3f MiB (limits: %.3f MiB, %zu tokens, %zu est)\n", - states.size(), size() / (1024.0 * 1024.0), limit_size / (1024.0 * 1024.0), limit_tokens, limit_tokens_cur); + SRV_WRN(" - cache state: %zu prompts, %.3f MiB (limits: %.3f MiB, %zu tokens, %zu est)\n", states.size(), + size() / (1024.0 * 1024.0), limit_size / (1024.0 * 1024.0), limit_tokens, limit_tokens_cur); for (const auto & state : states) { - SRV_WRN(" - prompt %p: %7d tokens, checkpoints: %2zu, %9.3f MiB\n", - (const void *)&state, state.n_tokens(), state.checkpoints.size(), state.size() / (1024.0 * 1024.0)); + SRV_WRN(" - prompt %p: %7d tokens, checkpoints: %2zu, %9.3f MiB\n", (const void *) &state, state.n_tokens(), + state.checkpoints.size(), state.size() / (1024.0 * 1024.0)); } } diff --git a/tools/server/server-task.h b/tools/server/server-task.h index a69e8f1a3d..7ccaf3c31b 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -3,10 +3,10 @@ #include "common.h" #include "llama.h" -#include -#include #include #include +#include +#include // TODO: prevent including the whole server-common.h as we only use server_tokens #include "server-common.h" @@ -30,7 +30,7 @@ enum server_task_type { // TODO: change this to more generic "response_format" to replace the "format_response_*" in server-common enum task_response_type { - TASK_RESPONSE_TYPE_NONE, // llama.cpp native format + TASK_RESPONSE_TYPE_NONE, // llama.cpp native format TASK_RESPONSE_TYPE_OAI_CHAT, TASK_RESPONSE_TYPE_OAI_CMPL, TASK_RESPONSE_TYPE_OAI_RESP, @@ -48,22 +48,23 @@ enum stop_type { struct task_params { bool stream = true; bool include_usage = false; - bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt + bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt bool return_tokens = false; bool return_progress = false; - int32_t n_keep = 0; // number of tokens to keep from initial prompt - int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half - int32_t n_predict = -1; // new tokens to predict - int32_t n_indent = 0; // minimum line indentation for the generated text in number of whitespace characters - int32_t n_cmpl = 1; // number of completions to generate from this prompt + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_discard = + 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half + int32_t n_predict = -1; // new tokens to predict + int32_t n_indent = 0; // minimum line indentation for the generated text in number of whitespace characters + int32_t n_cmpl = 1; // number of completions to generate from this prompt - int32_t n_cache_reuse = 0; // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled) + int32_t n_cache_reuse = 0; // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled) - int64_t t_max_prompt_ms = -1; // TODO: implement - int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit + int64_t t_max_prompt_ms = -1; // TODO: implement + int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit - std::map lora; // mapping adapter ID -> scale + std::map lora; // mapping adapter ID -> scale std::vector antiprompt; std::vector response_fields; @@ -71,7 +72,7 @@ struct task_params { bool timings_per_token = false; bool post_sampling_probs = false; - struct common_params_sampling sampling; + struct common_params_sampling sampling; struct common_params_speculative speculative; // response formatting @@ -84,7 +85,7 @@ struct task_params { common_chat_parser_params chat_parser_params; // Embeddings - int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm) + int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm) json format_logit_bias(const std::vector & logit_bias) const; json to_json(bool only_metrics = false) const; @@ -95,9 +96,10 @@ struct task_result_state { // tracking diffs for partial tool calls std::vector diffs; common_chat_parser_params chat_parser_params; - common_chat_msg chat_msg; - std::string generated_text; // append new chunks of generated text here - std::vector generated_tool_call_ids; + common_chat_msg chat_msg; + std::string generated_text; // append new chunks of generated text here + std::vector generated_tool_call_ids; + std::unordered_set sent_tool_call_names; // for OpenAI Responses and Anthropic streaming API: // track output item / content block state across chunks @@ -117,17 +119,17 @@ struct task_result_state { , oai_resp_message_id("msg_" + random_string()) {} // parse partial tool calls and update the internal state - common_chat_msg update_chat_msg( - const std::string & text_added, - bool is_partial, - std::vector & diffs); + common_chat_msg update_chat_msg(const std::string & text_added, + bool is_partial, + std::vector & diffs, + bool filter_tool_calls = false); }; struct server_task { - int id = -1; // to be filled by server_queue + int id = -1; // to be filled by server_queue // TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader - size_t index = 0; // used when there are multiple prompts (batch request) + size_t index = 0; // used when there are multiple prompts (batch request) // used by SERVER_TASK_TYPE_CANCEL int id_target = -1; @@ -157,13 +159,14 @@ struct server_task { std::string filename; std::string filepath; }; + slot_action slot_action; // used by SERVER_TASK_TYPE_METRICS bool metrics_reset_bucket = false; // used by SERVER_TASK_TYPE_SET_LORA - std::map set_lora; // mapping adapter ID -> scale + std::map set_lora; // mapping adapter ID -> scale server_task() = default; @@ -203,11 +206,10 @@ struct server_task { } } - static task_params params_from_json_cmpl( - const llama_vocab * vocab, - const common_params & params_base, - const int n_ctx_slot, - const json & data); + static task_params params_from_json_cmpl(const llama_vocab * vocab, + const common_params & params_base, + const int n_ctx_slot, + const json & data); // utility function static std::unordered_set get_list_id(const std::vector & tasks) { @@ -259,50 +261,53 @@ struct result_timings { int32_t cache_n = -1; int32_t prompt_n = -1; - double prompt_ms; - double prompt_per_token_ms; - double prompt_per_second; + double prompt_ms; + double prompt_per_token_ms; + double prompt_per_second; int32_t predicted_n = -1; - double predicted_ms; - double predicted_per_token_ms; - double predicted_per_second; + double predicted_ms; + double predicted_per_token_ms; + double predicted_per_second; // Optional speculative metrics - only included when > 0 - int32_t draft_n = 0; + int32_t draft_n = 0; int32_t draft_n_accepted = 0; json to_json() const; }; struct result_prompt_progress { - int32_t total = 0; - int32_t cache = 0; + int32_t total = 0; + int32_t cache = 0; int32_t processed = 0; - int64_t time_ms = 0; + int64_t time_ms = 0; json to_json() const; }; struct server_task_result { - int id = -1; - int id_slot = -1; + int id = -1; + int id_slot = -1; // TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader - size_t index = 0; // to be used for batched tasks + size_t index = 0; // to be used for batched tasks virtual bool is_error() { // only used by server_task_result_error return false; } + virtual bool is_stop() { // only used by server_task_result_cmpl_* return true; } + virtual void update(task_result_state &) { // only used by server_task_result_cmpl_* } - virtual json to_json() = 0; + + virtual json to_json() = 0; virtual ~server_task_result() = default; }; @@ -311,13 +316,15 @@ using server_task_result_ptr = std::unique_ptr; struct completion_token_output { llama_token tok; - float prob; + float prob; std::string text_to_send; + struct prob_info { llama_token tok; std::string txt; - float prob; + float prob; }; + std::vector probs; json to_json(bool post_sampling_probs) const; @@ -327,29 +334,28 @@ struct completion_token_output { static float logarithm(float x); static std::vector str_to_bytes(const std::string & str); - }; struct server_task_result_cmpl_final : server_task_result { - std::string content; + std::string content; llama_tokens tokens; - bool stream; - bool include_usage; + bool stream; + bool include_usage; result_timings timings; - std::string prompt; + std::string prompt; - bool truncated; - int32_t n_decoded; - int32_t n_prompt_tokens; - int32_t n_tokens_cached; - bool has_new_line; + bool truncated; + int32_t n_decoded; + int32_t n_prompt_tokens; + int32_t n_tokens_cached; + bool has_new_line; std::string stopping_word; - stop_type stop = STOP_TYPE_NONE; + stop_type stop = STOP_TYPE_NONE; - bool post_sampling_probs; + bool post_sampling_probs; std::vector probs_output; - std::vector response_fields; + std::vector response_fields; task_params generation_params; @@ -358,7 +364,7 @@ struct server_task_result_cmpl_final : server_task_result { task_response_type res_type = TASK_RESPONSE_TYPE_NONE; std::string oaicompat_model; std::string oaicompat_cmpl_id; - common_chat_msg oaicompat_msg; // to be populated by update() + common_chat_msg oaicompat_msg; // to be populated by update() std::vector oaicompat_msg_diffs; // to be populated by update() bool is_updated = false; @@ -369,7 +375,7 @@ struct server_task_result_cmpl_final : server_task_result { std::string oai_resp_message_id; virtual bool is_stop() override { - return true; // in stream mode, final responses are considered stop + return true; // in stream mode, final responses are considered stop } virtual json to_json() override; @@ -407,11 +413,11 @@ struct server_task_result_cmpl_partial : server_task_result { int32_t n_decoded; int32_t n_prompt_tokens; - bool post_sampling_probs; - bool is_progress = false; + bool post_sampling_probs; + bool is_progress = false; completion_token_output prob_output; - result_timings timings; - result_prompt_progress progress; + result_timings timings; + result_prompt_progress progress; // response formatting bool verbose = false; @@ -435,7 +441,7 @@ struct server_task_result_cmpl_partial : server_task_result { bool anthropic_has_reasoning = false; virtual bool is_stop() override { - return false; // in stream mode, partial responses are not considered stop + return false; // in stream mode, partial responses are not considered stop } virtual void update(task_result_state & state) override; @@ -477,24 +483,22 @@ struct server_task_result_rerank : server_task_result { }; struct server_task_result_error : server_task_result { - error_type err_type = ERROR_TYPE_SERVER; + error_type err_type = ERROR_TYPE_SERVER; std::string err_msg; // for ERROR_TYPE_EXCEED_CONTEXT_SIZE int32_t n_prompt_tokens = 0; int32_t n_ctx = 0; - virtual bool is_error() override { - return true; - } + virtual bool is_error() override { return true; } virtual json to_json() override; }; struct server_task_result_metrics : server_task_result { - int n_idle_slots; - int n_processing_slots; - int n_tasks_deferred; + int n_idle_slots; + int n_processing_slots; + int n_tasks_deferred; int64_t t_start; // TODO: somehow reuse server_metrics in the future, instead of duplicating the fields @@ -523,7 +527,7 @@ struct server_task_result_metrics : server_task_result { struct server_task_result_slot_save_load : server_task_result { std::string filename; - bool is_save; // true = save, false = load + bool is_save; // true = save, false = load size_t n_tokens; size_t n_bytes; @@ -541,9 +545,10 @@ struct server_task_result_slot_erase : server_task_result { struct server_task_result_get_lora : server_task_result { struct lora { common_adapter_lora_info info; - std::string alora_invocation_string; - llama_tokens alora_invocation_tokens; + std::string alora_invocation_string; + llama_tokens alora_invocation_tokens; }; + std::vector loras; virtual json to_json() override; @@ -559,9 +564,7 @@ struct server_prompt_checkpoint { std::vector data; - size_t size() const { - return data.size(); - } + size_t size() const { return data.size(); } }; struct server_prompt { @@ -581,22 +584,14 @@ struct server_prompt { return res; } - int n_tokens() const { - return tokens.size(); - } + int n_tokens() const { return tokens.size(); } - server_prompt clone() const { - return server_prompt { - tokens.clone(), - data, - checkpoints - }; - } + server_prompt clone() const { return server_prompt{ tokens.clone(), data, checkpoints }; } }; struct server_prompt_cache { server_prompt_cache(int32_t limit_size_mib, size_t limit_tokens) { - this->limit_size = 1024ull*1024ull*(limit_size_mib < 0 ? 0 : limit_size_mib); + this->limit_size = 1024ull * 1024ull * (limit_size_mib < 0 ? 0 : limit_size_mib); this->limit_tokens = limit_tokens; } From 1662fa5bea355d7c2cb04e06c763f31a687e08f8 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Wed, 21 Jan 2026 14:28:18 +0100 Subject: [PATCH 06/39] ANOTHER GIANT POST-FIXUP SQUISH --- 1 | 12 + CMakePresets.json | 421 +- common/CMakeLists.txt | 3 +- common/chat-auto-parser-analyzer.cpp | 1461 ---- common/chat-auto-parser-generator.cpp | 553 +- common/chat-auto-parser-helpers.cpp | 1647 +--- common/chat-auto-parser-helpers.h | 139 +- common/chat-auto-parser.h | 181 +- common/chat-diff-analyzer.cpp | 1670 ++++ common/chat-diff-analyzer.h | 347 + common/chat-peg-parser.cpp | 931 +- common/chat-peg-parser.h | 63 +- common/chat.cpp | 176 +- common/chat.h | 9 +- common/jinja/caps.cpp | 108 +- common/json-schema-to-grammar.cpp | 148 +- common/peg-parser.cpp | 127 +- common/peg-parser.h | 28 + docs/autoparser.md | 521 +- ...AI-c4ai-command-r7b-12-2024-tool_use.jinja | 2 +- ...seek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | 2 +- template.ans | 7774 +++++++++++++++++ tests/CMakeLists.txt | 2 + tests/peg-parser/test-python-dict-parser.cpp | 279 + tests/peg-parser/tests.h | 1 + tests/test-chat-auto-parser.cpp | 1845 ++++ tests/test-chat.cpp | 870 +- tests/test-peg-parser.cpp | 1 + tools/parser/CMakeLists.txt | 9 + tools/parser/debug-template-parser.cpp | 153 +- tools/parser/template-analysis.cpp | 610 ++ 31 files changed, 15067 insertions(+), 5026 deletions(-) create mode 100644 1 delete mode 100644 common/chat-auto-parser-analyzer.cpp create mode 100644 common/chat-diff-analyzer.cpp create mode 100644 common/chat-diff-analyzer.h create mode 100644 template.ans create mode 100644 tests/peg-parser/test-python-dict-parser.cpp create mode 100644 tests/test-chat-auto-parser.cpp create mode 100644 tools/parser/template-analysis.cpp diff --git a/1 b/1 new file mode 100644 index 0000000000..b77756c404 --- /dev/null +++ b/1 @@ -0,0 +1,12 @@ +Unknown option: 2 +Usage: llama-template-analysis [options] + +Options: + --template Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1') + --template-file Analyze custom template file + --all Analyze all templates from test suite + +Examples: + llama-template-analysis --all + llama-template-analysis --template deepseek + llama-template-analysis --template-file my-template.jinja diff --git a/CMakePresets.json b/CMakePresets.json index b5afeb3c0f..accdd72d18 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -1,95 +1,332 @@ { - "version": 4, - "configurePresets": [ - { - "name": "base", - "hidden": true, - "generator": "Ninja", - "binaryDir": "${sourceDir}/build-${presetName}", - "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." + "version": 4, + "configurePresets": [ + { + "name": "base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build-${presetName}", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." + } + }, + { + "name": "sycl-base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build-${presetName}", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_CXX_COMPILER": "icx", + "CMAKE_C_COMPILER": "cl", + "GGML_SYCL": "ON", + "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." + } + }, + { + "name": "debug", + "hidden": true, + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug" + } + }, + { + "name": "release", + "hidden": true, + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release" + } + }, + { + "name": "reldbg", + "hidden": true, + "cacheVariables": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo" + } + }, + { + "name": "static", + "hidden": true, + "cacheVariables": { + "GGML_STATIC": "ON" + } + }, + { + "name": "sycl_f16", + "hidden": true, + "cacheVariables": { + "GGML_SYCL_F16": "ON" + } + }, + { + "name": "vulkan", + "hidden": true, + "cacheVariables": { + "GGML_VULKAN": "ON" + } + }, + { + "name": "x64-windows-llvm", + "hidden": true, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake" + } + }, + { + "name": "arm64-windows-llvm", + "hidden": true, + "architecture": { + "value": "arm64", + "strategy": "external" + }, + "toolset": { + "value": "host=x64", + "strategy": "external" + }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake" + } + }, + { + "name": "arm64-apple-clang", + "hidden": true, + "architecture": { + "value": "arm64", + "strategy": "external" + }, + "toolset": { + "value": "host=x64", + "strategy": "external" + }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake" + } + }, + { + "name": "x64-linux-gcc", + "hidden": true, + "cacheVariables": { + "CMAKE_C_COMPILER": "gcc", + "CMAKE_CXX_COMPILER": "g++" + } + }, + { + "name": "x64-linux-gcc-debug", + "inherits": [ + "base", + "x64-linux-gcc", + "debug" + ] + }, + { + "name": "x64-linux-gcc-release", + "inherits": [ + "base", + "x64-linux-gcc", + "release" + ] + }, + { + "name": "x64-linux-gcc-reldbg", + "inherits": [ + "base", + "x64-linux-gcc", + "reldbg" + ] + }, + { + "name": "x64-linux-gcc+static-release", + "inherits": [ + "base", + "x64-linux-gcc", + "release", + "static" + ] + }, + { + "name": "arm64-windows-llvm-debug", + "inherits": [ + "base", + "arm64-windows-llvm", + "debug" + ] + }, + { + "name": "arm64-windows-llvm-release", + "inherits": [ + "base", + "arm64-windows-llvm", + "reldbg" + ] + }, + { + "name": "arm64-windows-llvm+static-release", + "inherits": [ + "base", + "arm64-windows-llvm", + "reldbg", + "static" + ] + }, + { + "name": "arm64-apple-clang-debug", + "inherits": [ + "base", + "arm64-apple-clang", + "debug" + ] + }, + { + "name": "arm64-apple-clang-release", + "inherits": [ + "base", + "arm64-apple-clang", + "reldbg" + ] + }, + { + "name": "arm64-apple-clang+static-release", + "inherits": [ + "base", + "arm64-apple-clang", + "reldbg", + "static" + ] + }, + { + "name": "x64-windows-llvm-debug", + "inherits": [ + "base", + "x64-windows-llvm", + "debug" + ] + }, + { + "name": "x64-windows-llvm-release", + "inherits": [ + "base", + "x64-windows-llvm", + "release" + ] + }, + { + "name": "x64-windows-llvm-reldbg", + "inherits": [ + "base", + "x64-windows-llvm", + "reldbg" + ] + }, + { + "name": "x64-windows-llvm+static-release", + "inherits": [ + "base", + "x64-windows-llvm", + "reldbg", + "static" + ] + }, + { + "name": "x64-windows-msvc-debug", + "inherits": [ + "base", + "debug" + ] + }, + { + "name": "x64-windows-msvc-release", + "inherits": [ + "base", + "reldbg" + ] + }, + { + "name": "x64-windows-msvc+static-release", + "inherits": [ + "base", + "reldbg", + "static" + ] + }, + { + "name": "x64-windows-sycl-debug", + "inherits": [ + "sycl-base", + "debug" + ] + }, + { + "name": "x64-windows-sycl-debug-f16", + "inherits": [ + "sycl-base", + "debug", + "sycl_f16" + ] + }, + { + "name": "x64-windows-sycl-release", + "inherits": [ + "sycl-base", + "release" + ] + }, + { + "name": "x64-windows-sycl-release-f16", + "inherits": [ + "sycl-base", + "release", + "sycl_f16" + ] + }, + { + "name": "x64-windows-vulkan-debug", + "inherits": [ + "base", + "vulkan", + "debug" + ] + }, + { + "name": "x64-windows-vulkan-release", + "inherits": [ + "base", + "vulkan", + "release" + ] + }, + { + "name": "ilintar-release", + "hidden": false, + "description": "Release build", + "displayName": "Release build", + "binaryDir": "${sourceDir}/build", + "cacheVariables": { + "GGML_CUDA": "ON", + "GGML_CUDA_FORCE_CUBLAS": "OFF", + "GGML_CUDA_FORCE_MMQ": "OFF", + "GGML_CUDA_FA_ALL_QUANTS": "1", + "CMAKE_CUDA_ARCHITECTURES": "86;120", + "GGML_BLAS": "ON", + "GGML_BLAS_VENDOR": "OpenBLAS", + "GGML_CPU_ALL_VARIANTS": "ON", + "GGML_BACKEND_DL": "ON", + "CMAKE_CUDA_COMPILER": "nvcc" + }, + "inherits": [ + "base", + "release", + "x64-linux-gcc-release" + ] } - }, - { - "name": "sycl-base", - "hidden": true, - "generator": "Ninja", - "binaryDir": "${sourceDir}/build-${presetName}", - "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_CXX_COMPILER": "icx", - "CMAKE_C_COMPILER": "cl", - "GGML_SYCL": "ON", - "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." + ], + "buildPresets": [ + { + "name": "parallel", + "description": "Parallel build", + "displayName": "Parallel build", + "configurePreset": "ilintar-release", + "jobs": 8 } - }, - { "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } }, - { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } }, - { "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, - { "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } }, - { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } }, - { "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } }, - - { - "name": "x64-windows-llvm", "hidden": true, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake" - } - }, - - { - "name": "arm64-windows-llvm", "hidden": true, - "architecture": { "value": "arm64", "strategy": "external" }, - "toolset": { "value": "host=x64", "strategy": "external" }, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake" - } - }, - - { - "name": "arm64-apple-clang", "hidden": true, - "architecture": { "value": "arm64", "strategy": "external" }, - "toolset": { "value": "host=x64", "strategy": "external" }, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake" - } - }, - { - "name": "x64-linux-gcc", "hidden": true, - "cacheVariables": { - "CMAKE_C_COMPILER": "gcc", - "CMAKE_CXX_COMPILER": "g++" - } - }, - { "name": "x64-linux-gcc-debug", "inherits": [ "base", "x64-linux-gcc", "debug" ] }, - { "name": "x64-linux-gcc-release", "inherits": [ "base", "x64-linux-gcc", "release" ] }, - { "name": "x64-linux-gcc-reldbg", "inherits": [ "base", "x64-linux-gcc", "reldbg" ] }, - { "name": "x64-linux-gcc+static-release", "inherits": [ "base", "x64-linux-gcc", "release", "static" ] }, - - { "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] }, - { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] }, - { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] }, - - { "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] }, - { "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] }, - { "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] }, - - { "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] }, - { "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] }, - { "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] }, - { "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] }, - - { "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] }, - { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] }, - { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] }, - - { "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] }, - { "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] }, - { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }, - { "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] }, - - { "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] }, - { "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] } - ] -} + ] +} \ No newline at end of file diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 689fd367da..41069a04ef 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -48,10 +48,11 @@ add_library(${TARGET} STATIC arg.cpp arg.h base64.hpp - chat-auto-parser-analyzer.cpp chat-auto-parser-generator.cpp chat-auto-parser-helpers.cpp chat-auto-parser.h + chat-diff-analyzer.cpp + chat-diff-analyzer.h chat-peg-parser.cpp chat-peg-parser.h chat.cpp diff --git a/common/chat-auto-parser-analyzer.cpp b/common/chat-auto-parser-analyzer.cpp deleted file mode 100644 index db6aa2c547..0000000000 --- a/common/chat-auto-parser-analyzer.cpp +++ /dev/null @@ -1,1461 +0,0 @@ -#include "chat-auto-parser-helpers.h" -#include "chat-auto-parser.h" -#include "chat.h" -#include "log.h" -#include "nlohmann/json.hpp" - -using json = nlohmann::ordered_json; - -template_analysis_result template_analyzer::analyze_template(const common_chat_template & tmpl) { - LOG_DBG("=== STARTING UNIFIED TEMPLATE ANALYSIS ===\n"); - - template_analysis_result result; - - // Phase 1: Analyze content and reasoning structure (no tools involved) - result.content = analyze_content_structure(tmpl); - - // Phase 2: Analyze tool call structure (layered on Phase 1) - result.tools = analyze_tool_structure(tmpl, result.content); - - // Post-processing: Extract reasoning markers from tool_section_start if Phase 1 didn't detect them - // Some templates (like Command-R7B) include reasoning markers in tool outputs but not in prompts - if (result.content.reasoning_start.empty() && !result.tools.tool_section_start.empty()) { - // Known reasoning end marker patterns that might be embedded in tool_section_start - std::vector> reasoning_patterns = { - { "<|START_THINKING|>", "<|END_THINKING|>" }, - { "<|START_THOUGHT|>", "<|END_THOUGHT|>" }, - { "<|START_REASON|>", "<|END_REASON|>" }, - { "", "" }, - { "", "" }, - }; - - for (const auto & [start_marker, end_marker] : reasoning_patterns) { - size_t end_pos = result.tools.tool_section_start.find(end_marker); - if (end_pos != std::string::npos) { - // Found reasoning end marker in tool_section_start - // Extract it and clean up tool_section_start - result.content.reasoning_start = start_marker; - result.content.reasoning_end = end_marker; - result.content.reasoning_mode = content_structure::REASONING_OPTIONAL; - - // Clean up tool_section_start: remove everything before and including the end marker - size_t after_end = end_pos + end_marker.length(); - if (after_end < result.tools.tool_section_start.length()) { - result.tools.tool_section_start = result.tools.tool_section_start.substr(after_end); - // Trim leading whitespace - size_t first_non_ws = result.tools.tool_section_start.find_first_not_of(" \t\n\r"); - if (first_non_ws != std::string::npos && first_non_ws > 0) { - result.tools.tool_section_start = result.tools.tool_section_start.substr(first_non_ws); - } - } - - LOG_DBG("Post-processing: Extracted reasoning markers from tool_section_start\n"); - LOG_DBG(" reasoning_start: '%s', reasoning_end: '%s'\n", result.content.reasoning_start.c_str(), - result.content.reasoning_end.c_str()); - LOG_DBG(" cleaned tool_section_start: '%s'\n", result.tools.tool_section_start.c_str()); - break; - } - } - } - - // Post-processing: Detect content markers for recipient-based format - // For recipient-based format, content is prefixed with tool_call_start_marker + recipient_name + \n - // (e.g., ">>>all\n"). We need to detect and extract this as the content_start marker. - if (result.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED && - result.content.content_start.empty() && !result.tools.tool_section_start.empty()) { - // Render template with content only (no tools) to detect the content marker - templates_params inputs; - inputs.messages = { - { { "role", "user" }, { "content", "Hello" } }, - { { "role", "assistant" }, { "content", "ACTUAL_CONTENT_HERE" } } - }; - inputs.add_generation_prompt = true; - - std::string output; - try { - output = common_chat_template_direct_apply(tmpl, inputs); - } catch (...) { - output = ""; - } - - if (!output.empty()) { - // Find where the actual content starts - size_t content_pos = output.find("ACTUAL_CONTENT_HERE"); - - if (content_pos != std::string::npos) { - // For recipient-based format, find the last occurrence of tool_call_start_marker - // before the content. The marker is from that position to the content (including the newline). - size_t marker_pos = output.rfind(result.tools.tool_section_start, content_pos); - - if (marker_pos != std::string::npos && marker_pos < content_pos) { - // Find the newline after the marker - size_t newline_pos = output.find('\n', marker_pos); - - if (newline_pos != std::string::npos && newline_pos < content_pos) { - // Extract everything up to and including the newline after the marker - std::string detected_marker = output.substr(marker_pos, newline_pos - marker_pos + 1); - - // Verify the marker starts with tool_call_start_marker - if (detected_marker.find(result.tools.tool_section_start) == 0) { - result.content.content_start = detected_marker; - result.content.content_mode = content_structure::CONTENT_ALWAYS_WRAPPED; - LOG_DBG("Post-processing: Detected recipient-based content marker: '%s'\n", - result.content.content_start.c_str()); - } - } - } - } - } - } - - // Collect preserved tokens from both phases - collect_preserved_tokens(result); - - LOG_DBG("=== UNIFIED TEMPLATE ANALYSIS COMPLETE ===\n"); - LOG_DBG("Content structure:\n"); - LOG_DBG(" reasoning_mode: %d\n", static_cast(result.content.reasoning_mode)); - LOG_DBG(" reasoning_start: '%s'\n", result.content.reasoning_start.c_str()); - LOG_DBG(" reasoning_end: '%s'\n", result.content.reasoning_end.c_str()); - LOG_DBG(" content_mode: %d\n", static_cast(result.content.content_mode)); - LOG_DBG(" content_start: '%s'\n", result.content.content_start.c_str()); - LOG_DBG(" content_end: '%s'\n", result.content.content_end.c_str()); - LOG_DBG("Tool structure:\n"); - LOG_DBG(" supports_tools: %s\n", result.tools.supports_tools ? "true" : "false"); - LOG_DBG(" function_format: %d\n", static_cast(result.tools.function_format)); - LOG_DBG(" argument_format: %d\n", static_cast(result.tools.argument_format)); - LOG_DBG(" tool_section_start: '%s'\n", result.tools.tool_section_start.c_str()); - LOG_DBG(" tool_section_end: '%s'\n", result.tools.tool_section_end.c_str()); - - return result; -} - -content_structure template_analyzer::analyze_content_structure(const common_chat_template & tmpl) { - LOG_DBG("=== PHASE 1: ANALYZING CONTENT STRUCTURE ===\n"); - - content_structure cs; - - // Step 1: Detect reasoning markers by toggling enable_thinking - detect_reasoning_markers(tmpl, cs); - - // Step 2: Detect content wrapping markers - detect_content_markers(tmpl, cs); - - // Step 3: Determine reasoning mode (NONE, OPTIONAL, FORCED_OPEN) - templates_params inputs; - inputs.messages = { - { { "role", "user" }, { "content", "Hello" } } - }; - inputs.add_generation_prompt = true; - inputs.enable_thinking = true; - - std::string prompt; - try { - prompt = common_chat_template_direct_apply(tmpl, inputs); - } catch (...) { - LOG_DBG("Failed to render template for reasoning mode detection\n"); - return cs; - } - - cs.reasoning_mode = detect_reasoning_mode(cs, prompt); - - LOG_DBG("Phase 1 complete: reasoning_mode=%d, content_mode=%d\n", static_cast(cs.reasoning_mode), - static_cast(cs.content_mode)); - - return cs; -} - -void template_analyzer::detect_reasoning_markers(const common_chat_template & tmpl, content_structure & cs) { - LOG_DBG("=== DETECTING REASONING MARKERS ===\n"); - - // Method 1: Compare outputs with reasoning_content field present vs absent - json reasoning_msg = { - { "role", "assistant" }, - { "content", "CONTENT_MARKER" }, - { "reasoning_content", "THOUGHT_MARKER" } - }; - - json base_msg = { - { "role", "assistant" }, - { "content", "CONTENT_MARKER" } - }; - - templates_params inputs; - - inputs.messages = { reasoning_msg }; - std::string reasoning_output; - try { - reasoning_output = common_chat_template_direct_apply(tmpl, inputs); - } catch (...) { - LOG_DBG("Failed to render template with reasoning_content\n"); - reasoning_output = ""; - } - - inputs.messages = { base_msg }; - std::string base_output; - try { - base_output = common_chat_template_direct_apply(tmpl, inputs); - } catch (...) { - LOG_DBG("Failed to render base template\n"); - base_output = ""; - } - - // If outputs differ and we can find THOUGHT_MARKER, extract the reasoning markers - if (!reasoning_output.empty() && reasoning_output != base_output) { - size_t thought_pos = reasoning_output.find("THOUGHT_MARKER"); - size_t content_pos = reasoning_output.find("CONTENT_MARKER"); - - if (thought_pos != std::string::npos && content_pos != std::string::npos && content_pos > thought_pos) { - // Extract what's between THOUGHT_MARKER and CONTENT_MARKER as the end marker - size_t thought_end = thought_pos + strlen("THOUGHT_MARKER"); - cs.reasoning_end = reasoning_output.substr(thought_end, content_pos - thought_end); - - // Find what's before THOUGHT_MARKER by comparing with base_output - size_t diff_start = 0; - while (diff_start < base_output.length() && diff_start < reasoning_output.length() && - base_output[diff_start] == reasoning_output[diff_start]) { - diff_start++; - } - - // If diff_start is in the middle of a tag (previous char is '<'), back up to include it - // This handles cases like base="" vs reasoning="" where both share '<' - if (diff_start > 0 && diff_start < reasoning_output.length() && - reasoning_output[diff_start - 1] == '<') { - diff_start--; - } - - if (diff_start < thought_pos) { - cs.reasoning_start = reasoning_output.substr(diff_start, thought_pos - diff_start); - } - - trim_whitespace(cs.reasoning_start); - trim_whitespace(cs.reasoning_end); - - // If we found reasoning_end but not reasoning_start, try to derive it from reasoning_end - // For example: -> , -> <|START_THINKING|> - if (cs.reasoning_start.empty() && !cs.reasoning_end.empty()) { - // First, try to derive directly from the closing tag format - if (cs.reasoning_end.length() > 3 && cs.reasoning_end[0] == '<' && cs.reasoning_end[1] == '/') { - // Standard XML closing tag like -> - size_t tag_end_pos = cs.reasoning_end.find('>'); - if (tag_end_pos != std::string::npos) { - std::string tag_name = cs.reasoning_end.substr(2, tag_end_pos - 2); - cs.reasoning_start = "<" + tag_name + ">"; - LOG_DBG("Method 1: Derived reasoning_start from closing tag format\n"); - LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); - } - } else if (cs.reasoning_end.find("<|END_") == 0 || cs.reasoning_end.find("<|/") == 0) { - // Special token format like <|END_THINKING|> -> <|START_THINKING|> - // or <|/think|> -> <|think|> - if (cs.reasoning_end.find("<|END_") == 0) { - std::string core = cs.reasoning_end.substr(6); // Remove "<|END_" - cs.reasoning_start = "<|START_" + core; - } else { - std::string core = cs.reasoning_end.substr(3); // Remove "<|/" - cs.reasoning_start = "<|" + core; - } - LOG_DBG("Method 1: Derived reasoning_start from special token format\n"); - LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); - } - } - - if (!cs.reasoning_start.empty()) { - LOG_DBG("Method 1: Found reasoning markers via reasoning_content field\n"); - LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); - } - } - } - - // Method 2: Compare prompts with enable_thinking true vs false - if (cs.reasoning_start.empty()) { - LOG_DBG("Method 1 failed, trying Method 2 (enable_thinking toggle)\n"); - - json user_msg = { - { "role", "user" }, - { "content", "Hello" } - }; - - templates_params inputs_prompt; - inputs_prompt.messages = { user_msg }; - inputs_prompt.add_generation_prompt = true; - inputs_prompt.enable_thinking = false; - std::string prompt_no_think; - try { - prompt_no_think = common_chat_template_direct_apply(tmpl, inputs_prompt); - } catch (...) { - prompt_no_think = ""; - } - - inputs_prompt.enable_thinking = true; - std::string prompt_think; - try { - prompt_think = common_chat_template_direct_apply(tmpl, inputs_prompt); - } catch (...) { - prompt_think = ""; - } - - if (!prompt_think.empty() && prompt_think != prompt_no_think) { - // Find the difference - this should be the reasoning start marker - size_t diff_pos = 0; - while (diff_pos < prompt_no_think.length() && diff_pos < prompt_think.length() && - prompt_no_think[diff_pos] == prompt_think[diff_pos]) { - diff_pos++; - } - - // Check which direction has extra content - if (prompt_think.length() > prompt_no_think.length()) { - // Normal case: enable_thinking=true adds content (e.g., at the end) - std::string diff = prompt_think.substr(diff_pos); - - // Only use if it looks like a tag - if (diff.find('<') != std::string::npos || diff.find('[') != std::string::npos) { - cs.reasoning_start = diff; - cs.reasoning_end = create_closing_tag(diff); - trim_whitespace(cs.reasoning_start); - trim_whitespace(cs.reasoning_end); - - LOG_DBG("Method 2: Found reasoning markers via enable_thinking toggle\n"); - LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); - } - } else { - // Reverse case: enable_thinking=false adds content (e.g., GLM-4.6 adds ) - // This means the template adds an empty thinking block when thinking is disabled - std::string diff = prompt_no_think.substr(diff_pos); - - // Look for adjacent opening and closing tags like - size_t open_start = diff.find('<'); - if (open_start != std::string::npos) { - size_t open_end = diff.find('>', open_start); - if (open_end != std::string::npos) { - std::string opening_tag = diff.substr(open_start, open_end - open_start + 1); - // Skip if it looks like a role marker - if (opening_tag.find("assistant") == std::string::npos && - opening_tag.find("user") == std::string::npos && - opening_tag.find("system") == std::string::npos) { - std::string expected_close = create_closing_tag(opening_tag); - // Check if the closing tag follows immediately (empty thinking block) - size_t close_pos = diff.find(expected_close, open_end + 1); - if (close_pos != std::string::npos) { - // Verify only whitespace between tags - std::string between = diff.substr(open_end + 1, close_pos - open_end - 1); - bool only_ws = true; - for (char c : between) { - if (!std::isspace(static_cast(c))) { - only_ws = false; - break; - } - } - if (only_ws) { - cs.reasoning_start = opening_tag; - cs.reasoning_end = expected_close; - trim_whitespace(cs.reasoning_start); - trim_whitespace(cs.reasoning_end); - - LOG_DBG("Method 2: Found reasoning markers via enable_thinking toggle (reverse)\n"); - LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), - cs.reasoning_end.c_str()); - } - } - } - } - } - } - } - } - - // Method 3: Check if the prompt ends with an unclosed reasoning tag - if (cs.reasoning_start.empty()) { - LOG_DBG("Method 2 failed, trying Method 3 (prompt ending with open tag)\n"); - - json user_msg = { - { "role", "user" }, - { "content", "Hello" } - }; - - templates_params inputs_prompt; - inputs_prompt.messages = { user_msg }; - inputs_prompt.add_generation_prompt = true; - inputs_prompt.enable_thinking = true; - - std::string prompt; - try { - prompt = common_chat_template_direct_apply(tmpl, inputs_prompt); - } catch (...) { - prompt = ""; - } - - if (!prompt.empty()) { - // Save trailing whitespace before trimming - std::string trailing_ws; - size_t end_pos = prompt.length(); - while (end_pos > 0 && (prompt[end_pos - 1] == '\n' || prompt[end_pos - 1] == '\r')) { - trailing_ws = prompt[end_pos - 1] + trailing_ws; - end_pos--; - } - - trim_trailing_newlines(prompt); - - // Find the last tag in the prompt - size_t last_open_angle = prompt.rfind('<'); - size_t last_close_angle = prompt.rfind('>'); - - // Check for closed tags at the end - if (last_open_angle != std::string::npos && last_close_angle != std::string::npos && - last_close_angle == prompt.length() - 1 && last_close_angle > last_open_angle) { - std::string tag = prompt.substr(last_open_angle); - - // Check if this looks like a reasoning tag (not a role marker) - std::vector blacklisted_tags = { - "<|CHATBOT_TOKEN|>", "<|SYSTEM_TOKEN|>", "<|USER_TOKEN|>", "<|ASSISTANT_TOKEN|>", "<|im_start|>", - "<|im_end|>", "<|start_of_role|>", "<|end_of_role|>", "<|end_of_text|>", "<|end|>", - "<|assistant|>", "<|user|>", "<|system|>", "", "", - "" - }; - - bool is_blacklisted = false; - for (const auto & blacklisted : blacklisted_tags) { - if (tag == blacklisted) { - is_blacklisted = true; - break; - } - } - - // Check if it looks like a thinking/reasoning tag - std::string lower_tag = tag; - std::transform(lower_tag.begin(), lower_tag.end(), lower_tag.begin(), ::tolower); - bool looks_like_reasoning = lower_tag.find("think") != std::string::npos || - lower_tag.find("reason") != std::string::npos || - lower_tag.find("thought") != std::string::npos; - - if (!is_blacklisted && looks_like_reasoning) { - // Check if the detected tag is a close tag (starts with when thinking is disabled - bool is_close_tag = (tag.size() > 2 && tag[0] == '<' && tag[1] == '/'); - - if (is_close_tag) { - // The tag is a close tag (e.g., ) - // Derive the open tag by removing the '/' - std::string tag_name = extract_tag_name(tag); // Returns "/think" for - if (!tag_name.empty() && tag_name[0] == '/') { - tag_name = tag_name.substr(1); // Remove leading '/' - } - cs.reasoning_start = "<" + tag_name + ">"; - cs.reasoning_end = tag; - trim_whitespace(cs.reasoning_start); - trim_whitespace(cs.reasoning_end); - - LOG_DBG("Method 3: Found reasoning markers via prompt ending with CLOSE tag\n"); - LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); - - // Note: The prompt ends with the close tag, meaning thinking is disabled. - // The reasoning_mode will be set in detect_reasoning_mode() which will - // correctly identify this as NOT forced open since the prompt ends with - // the end marker, not the start marker. - } else { - // Standard case: open tag at the end (e.g., ) - cs.reasoning_start = tag + trailing_ws; - cs.reasoning_end = create_closing_tag(tag) + trailing_ws; - trim_whitespace(cs.reasoning_start); - trim_whitespace(cs.reasoning_end); - - LOG_DBG("Method 3: Found reasoning markers via prompt ending with tag\n"); - LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); - } - } - } - } - } - - // Method 4: Look for adjacent opening/closing tag pairs with common content in prompt - // This detects patterns like , <|START_THINKING|><|END_THINKING|>, [think][/think] - if (cs.reasoning_start.empty()) { - LOG_DBG("Method 3 failed, trying Method 4 (adjacent tag pairs with common content)\n"); - - json user_msg = { - { "role", "user" }, - { "content", "Hello" } - }; - - templates_params inputs_prompt; - inputs_prompt.messages = { user_msg }; - inputs_prompt.add_generation_prompt = true; - // Try with thinking disabled - templates may output empty thinking blocks - inputs_prompt.enable_thinking = false; - - std::string prompt; - try { - prompt = common_chat_template_direct_apply(tmpl, inputs_prompt); - } catch (...) { - prompt = ""; - } - - if (!prompt.empty()) { - // Look for patterns like or ... where tag1 and tag2 share a common word - // Common patterns: - // - // <|START_THINKING|><|END_THINKING|> - // [think][/think] - - // Find potential tag pairs by looking for closing tags that immediately follow opening tags - // Pattern: opening tag followed by closing tag with same keyword - std::vector> tag_patterns = { - // (opening pattern, closing pattern, keyword to match) - { "<|START_", "<|END_", "THINKING" }, - { "<|START_", "<|END_", "THOUGHT" }, - { "<|START_", "<|END_", "REASON" }, - { "", "", "" }, - { "", "", "" }, - { "", "", "" }, - { "[think]", "[/think]", "" }, - { "[THINK]", "[/THINK]", "" }, - { "", "", "" }, - { "", "", "" }, - { "<|think|>", "<|/think|>", "" }, - }; - - for (const auto & [open_prefix, close_prefix, keyword] : tag_patterns) { - size_t open_pos = prompt.find(open_prefix); - if (open_pos == std::string::npos) { - continue; - } - - std::string start_tag; - std::string end_tag; - - if (!keyword.empty()) { - // Pattern like <|START_THINKING|><|END_THINKING|> - std::string full_open = open_prefix + keyword; - size_t full_open_pos = prompt.find(full_open); - if (full_open_pos == std::string::npos) { - continue; - } - - // Find the end of this tag (look for |> or >) - size_t tag_end = prompt.find("|>", full_open_pos + full_open.length()); - if (tag_end == std::string::npos) { - tag_end = prompt.find('>', full_open_pos + full_open.length()); - } - if (tag_end == std::string::npos) { - continue; - } - - start_tag = - prompt.substr(full_open_pos, tag_end - full_open_pos + (prompt[tag_end] == '|' ? 2 : 1)); - - // Look for the corresponding end tag - std::string expected_close = close_prefix + keyword; - size_t close_pos = prompt.find(expected_close, tag_end); - if (close_pos == std::string::npos) { - continue; - } - - // Find end of close tag - size_t close_end = prompt.find("|>", close_pos + expected_close.length()); - if (close_end == std::string::npos) { - close_end = prompt.find('>', close_pos + expected_close.length()); - } - if (close_end == std::string::npos) { - continue; - } - - end_tag = prompt.substr(close_pos, close_end - close_pos + (prompt[close_end] == '|' ? 2 : 1)); - } else { - // Simple pattern like - start_tag = open_prefix; - size_t close_pos = prompt.find(close_prefix, open_pos + start_tag.length()); - if (close_pos == std::string::npos) { - continue; - } - end_tag = close_prefix; - } - - // Verify the tags are adjacent or nearly adjacent (only whitespace between) - size_t start_end_pos = prompt.find(start_tag) + start_tag.length(); - size_t end_start_pos = prompt.find(end_tag, start_end_pos); - if (end_start_pos != std::string::npos) { - std::string between = prompt.substr(start_end_pos, end_start_pos - start_end_pos); - // Allow only whitespace between the tags (empty thinking block) - bool only_whitespace = true; - for (char c : between) { - if (!std::isspace(static_cast(c))) { - only_whitespace = false; - break; - } - } - - if (only_whitespace) { - cs.reasoning_start = start_tag; - cs.reasoning_end = end_tag; - LOG_DBG("Method 4: Found reasoning markers via adjacent tag pairs\n"); - LOG_DBG(" start: '%s', end: '%s'\n", cs.reasoning_start.c_str(), cs.reasoning_end.c_str()); - break; - } - } - } - } - } - - if (cs.reasoning_start.empty()) { - LOG_DBG("No reasoning markers detected\n"); - } -} - -void template_analyzer::detect_content_markers(const common_chat_template & tmpl, content_structure & cs) { - LOG_DBG("=== DETECTING CONTENT MARKERS ===\n"); - - // Render template with a unique content marker - json user_msg = { - { "role", "user" }, - { "content", "Hello" } - }; - json assistant_msg = { - { "role", "assistant" }, - { "content", "UNIQUE_CONTENT_12345" } - }; - - templates_params inputs; - inputs.messages = { user_msg, assistant_msg }; - // Try with thinking enabled first (some templates only wrap content when reasoning is present) - inputs.extra_context["thinking"] = true; - inputs.enable_thinking = true; - - std::string output_with_thinking; - try { - output_with_thinking = common_chat_template_direct_apply(tmpl, inputs); - } catch (...) { - output_with_thinking = ""; - } - - // Also render without thinking - inputs.extra_context["thinking"] = false; - inputs.enable_thinking = false; - - std::string output_no_thinking; - try { - output_no_thinking = common_chat_template_direct_apply(tmpl, inputs); - } catch (...) { - output_no_thinking = ""; - } - - // Check both outputs for content markers - auto find_content_markers = [&](const std::string & output) -> std::pair { - size_t marker_pos = output.find("UNIQUE_CONTENT_12345"); - if (marker_pos == std::string::npos) { - return { "", "" }; - } - - // Known content marker patterns - std::vector> patterns = { - { "<|START_RESPONSE|>", "<|END_RESPONSE|>" }, - { "<|response|>", "<|/response|>" }, - { "", "" }, - { "", "" }, - { "", "" }, - { "<|CHATBOT_TOKEN|>", "<|END_OF_TURN_TOKEN|>" }, - }; - - for (const auto & [start_pattern, end_pattern] : patterns) { - size_t start_pos = output.rfind(start_pattern, marker_pos); - if (start_pos != std::string::npos) { - // Check that there's only whitespace between the start pattern and our marker - std::string between = - output.substr(start_pos + start_pattern.length(), marker_pos - start_pos - start_pattern.length()); - size_t first_non_ws = between.find_first_not_of(" \t\n\r"); - if (first_non_ws == std::string::npos) { - // Found valid start marker, look for end marker - size_t marker_end = marker_pos + strlen("UNIQUE_CONTENT_12345"); - size_t end_pos = output.find(end_pattern, marker_end); - if (end_pos != std::string::npos) { - std::string after = output.substr(marker_end, end_pos - marker_end); - size_t first_non_ws_after = after.find_first_not_of(" \t\n\r"); - if (first_non_ws_after == std::string::npos) { - return { start_pattern, end_pattern }; - } - } - } - } - } - - return { "", "" }; - }; - - auto [start_with_thinking, end_with_thinking] = find_content_markers(output_with_thinking); - auto [start_no_thinking, end_no_thinking] = find_content_markers(output_no_thinking); - - if (!start_with_thinking.empty() && !start_no_thinking.empty()) { - // Content is always wrapped - cs.content_mode = content_structure::CONTENT_ALWAYS_WRAPPED; - cs.content_start = start_with_thinking; - cs.content_end = end_with_thinking; - LOG_DBG("Content markers found in both thinking modes (ALWAYS_WRAPPED)\n"); - } else if (!start_with_thinking.empty() && start_no_thinking.empty()) { - // Content is wrapped only when reasoning is present - cs.content_mode = content_structure::CONTENT_WRAPPED_WITH_REASONING; - cs.content_start = start_with_thinking; - cs.content_end = end_with_thinking; - LOG_DBG("Content markers found only with thinking enabled (WRAPPED_WITH_REASONING)\n"); - } else if (!start_no_thinking.empty()) { - // Unusual: content wrapped without thinking but not with? Use what we found - cs.content_mode = content_structure::CONTENT_ALWAYS_WRAPPED; - cs.content_start = start_no_thinking; - cs.content_end = end_no_thinking; - LOG_DBG("Content markers found only without thinking (treating as ALWAYS_WRAPPED)\n"); - } else { - cs.content_mode = content_structure::CONTENT_PLAIN; - LOG_DBG("No content markers detected (PLAIN)\n"); - } - - LOG_DBG("Content markers: start='%s', end='%s'\n", cs.content_start.c_str(), cs.content_end.c_str()); -} - -content_structure::reasoning_mode_type template_analyzer::detect_reasoning_mode(const content_structure & cs, - const std::string & prompt) { - LOG_DBG("=== DETECTING REASONING MODE ===\n"); - - // If both markers are empty, mode is NONE - if (cs.reasoning_start.empty() && cs.reasoning_end.empty()) { - LOG_DBG("No reasoning markers, mode=REASONING_NONE\n"); - return content_structure::REASONING_NONE; - } - - // Handle case with end marker but no start marker (implicit start) - if (cs.reasoning_start.empty() && !cs.reasoning_end.empty()) { - LOG_DBG("Reasoning end marker present but no start marker, mode=REASONING_FORCED_OPEN\n"); - return content_structure::REASONING_FORCED_OPEN; - } - - // Check if the prompt ends with the reasoning start marker (forced open) - std::string trimmed_prompt = prompt; - trim_trailing_newlines(trimmed_prompt); - - std::string trimmed_marker = cs.reasoning_start; - trim_whitespace(trimmed_marker); - - if (string_ends_with(trimmed_prompt, trimmed_marker)) { - LOG_DBG("Prompt ends with reasoning start marker, mode=REASONING_FORCED_OPEN\n"); - return content_structure::REASONING_FORCED_OPEN; - } - - // Otherwise, reasoning is optional - LOG_DBG("Reasoning markers present but not forced, mode=REASONING_OPTIONAL\n"); - return content_structure::REASONING_OPTIONAL; -} - -tool_call_structure template_analyzer::analyze_tool_structure(const common_chat_template & tmpl, - const content_structure & content) { - (void) content; // May be used in future for better tool detection - - LOG_DBG("=== PHASE 2: ANALYZING TOOL STRUCTURE ===\n"); - - tool_call_structure ts; - - // Use differential analysis to detect tool patterns - // This now includes a robust test that renders two payloads: - // 1. Tool definitions + content only - // 2. Tool definitions + content + tool calls - // If outputs are identical, the template doesn't support tool calls - auto discovered = analyze_by_differential(tmpl); - auto format = determine_format_from_patterns(discovered); - - // Strip EOS tokens from discovered patterns (handles both standard <|eos|> and fullwidth <|end▁of▁sentence|>) - if (!discovered.tool_call_closer.empty()) { - LOG_DBG("Before stripping: tool_call_closer='%s' (len=%zu)\n", discovered.tool_call_closer.c_str(), - discovered.tool_call_closer.length()); - discovered.tool_call_closer = strip_eos_token(discovered.tool_call_closer); - LOG_DBG("After stripping: tool_call_closer='%s'\n", discovered.tool_call_closer.c_str()); - } - if (!discovered.tool_call_end_marker.empty()) { - discovered.tool_call_end_marker = strip_eos_token(discovered.tool_call_end_marker); - } - - if (format == FORMAT_UNKNOWN) { - LOG_DBG("Template does not support tool calls (differential analysis returned no patterns)\n"); - ts.supports_tools = false; - return ts; - } - - // Propagate requires_nonnull_content flag from differential analysis - ts.requires_nonnull_content = discovered.requires_nonnull_content; - if (ts.requires_nonnull_content) { - LOG_DBG("Template requires non-null content (renders null as 'None')\n"); - } - - // Check if minja reports tool call support (for informational purposes) - auto caps = tmpl.original_caps(); - if (!caps.supports_tool_calls) { - LOG_DBG("Note: minja caps indicate no tool support, but differential analysis found patterns\n"); - } - - if (format == FORMAT_JSON_NATIVE) { - analyze_json_format(ts, discovered); - } else if (format == FORMAT_XML_CONSTRUCTED) { - analyze_xml_format(ts, discovered); - } else if (format == FORMAT_BRACKET_TAG) { - analyze_bracket_tag_format(ts, discovered); - } else if (format == FORMAT_RECIPIENT_BASED) { - analyze_recipient_based_format(ts, discovered); - } else if (format == FORMAT_MARKDOWN_CODE_BLOCK) { - analyze_markdown_code_block_format(ts, discovered); - } - - return ts; -} - -void template_analyzer::collect_preserved_tokens(template_analysis_result & result) { - LOG_DBG("=== COLLECTING PRESERVED TOKENS ===\n"); - - std::vector tokens; - - // Add reasoning markers - if (!result.content.reasoning_start.empty()) { - tokens.push_back(result.content.reasoning_start); - } - if (!result.content.reasoning_end.empty()) { - tokens.push_back(result.content.reasoning_end); - } - - // Add content markers - if (!result.content.content_start.empty()) { - tokens.push_back(result.content.content_start); - } - if (!result.content.content_end.empty()) { - tokens.push_back(result.content.content_end); - } - - // Add tool section markers - if (!result.tools.tool_section_start.empty()) { - tokens.push_back(result.tools.tool_section_start); - } - if (!result.tools.tool_section_end.empty()) { - tokens.push_back(result.tools.tool_section_end); - } - - // Add function markers for tag-based formats - if (result.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) { - if (!result.tools.function_prefix.empty()) { - tokens.push_back(result.tools.function_prefix); - } - if (!result.tools.function_close.empty()) { - tokens.push_back(result.tools.function_close); - } - } - - // Add markers for prefixed-indexed formats (e.g., Kimi-K2) - if (result.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { - if (!result.tools.per_call_start.empty()) { - tokens.push_back(result.tools.per_call_start); - } - if (!result.tools.args_marker.empty()) { - tokens.push_back(result.tools.args_marker); - } - if (!result.tools.per_call_end.empty()) { - tokens.push_back(result.tools.per_call_end); - } - } - - // Add argument markers for tagged formats - if (result.tools.argument_format == tool_call_structure::ARGS_TAGGED) { - if (!result.tools.arg_prefix.empty()) { - tokens.push_back(result.tools.arg_prefix); - } - if (!result.tools.arg_close.empty()) { - tokens.push_back(result.tools.arg_close); - } - } - - // Add markers for markdown code block format (Cohere Command-R Plus) - if (result.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK) { - if (!result.tools.code_block_marker.empty()) { - tokens.push_back(result.tools.code_block_marker); - } - if (!result.tools.tool_section_end.empty()) { - tokens.push_back(result.tools.tool_section_end); // Closing code fence ``` - } - } - - result.preserved_tokens = tokens; - LOG_DBG("Collected %zu preserved tokens\n", tokens.size()); -} - -void template_analyzer::analyze_json_format(tool_call_structure & ts, const internal_discovered_pattern & discovered) { - ts.supports_tools = true; - ts.function_format = tool_call_structure::FUNC_JSON_OBJECT; - ts.argument_format = tool_call_structure::ARGS_JSON; - ts.tool_section_start = discovered.tool_call_start_marker; - ts.tool_section_end = discovered.tool_call_end_marker; - ts.name_field = discovered.tool_name_field; - ts.args_field = discovered.tool_args_field; - ts.id_field = discovered.tool_id_field; - - // Check for FUNC_NAME_AS_KEY format (e.g. Apertus: {"function_name": args}) - // This is characterized by the opener ending in {" and no explicit name field found yet - if (!discovered.tool_call_opener.empty() && discovered.tool_call_opener.length() >= 2 && - discovered.tool_call_opener.substr(discovered.tool_call_opener.length() - 2) == "{\"") { - LOG_DBG("Detected FUNC_NAME_AS_KEY format from tool_call_opener ending in '{\"' \n"); - ts.function_format = tool_call_structure::FUNC_NAME_AS_KEY; - } - - // For JSON_NATIVE format, clean up tool_section_end to only include the closing tag - // The differential analysis may include JSON closing braces (e.g., "}}\n") - // but the parser handles JSON separately, so we only need the tag marker - if (!ts.tool_section_end.empty()) { - size_t tag_start = ts.tool_section_end.find("', tag_start); - if (tag_end != std::string::npos) { - // Check if there is a closing bracket ']' before the tag - size_t bracket_pos = ts.tool_section_end.rfind(']', tag_start); - if (bracket_pos != std::string::npos) { - // Include the bracket - ts.tool_section_end = ts.tool_section_end.substr(bracket_pos, tag_end - bracket_pos + 1); - } else { - ts.tool_section_end = ts.tool_section_end.substr(tag_start, tag_end - tag_start + 1); - } - } - } else { - // Try other closing patterns like ]<|END_ACTION|> - tag_start = ts.tool_section_end.find("<|"); - if (tag_start != std::string::npos) { - size_t tag_end = ts.tool_section_end.find("|>", tag_start); - if (tag_end != std::string::npos) { - // Include the opening bracket if present - size_t bracket_pos = ts.tool_section_end.rfind(']', tag_start); - if (bracket_pos != std::string::npos && bracket_pos + 1 == tag_start) { - ts.tool_section_end = ts.tool_section_end.substr(bracket_pos, tag_end - bracket_pos + 2); - } else { - ts.tool_section_end = ts.tool_section_end.substr(tag_start, tag_end - tag_start + 2); - } - } - } - } - } -} - -void template_analyzer::analyze_xml_format(tool_call_structure & ts, const internal_discovered_pattern & discovered) { - ts.supports_tools = true; - ts.function_format = tool_call_structure::FUNC_TAG_WITH_NAME; - ts.tool_section_start = discovered.tool_call_start_marker; - ts.tool_section_end = discovered.tool_call_end_marker; - - // Extract function tag patterns - if (!discovered.function_opener.empty()) { - char first = discovered.function_opener[0]; - if (first != '<' && first != '{' && first != '[') { - // Non-XML/JSON prefix format (e.g., ">>>", "##", etc.) - // Function name follows prefix directly, ends with newline - ts.function_prefix = discovered.function_opener; - ts.function_suffix = "\n"; // Function name typically ends with newline - ts.function_close = ""; // No closing tag for prefix formats - } else { - size_t eq_pos = discovered.function_opener.find('='); - if (eq_pos != std::string::npos) { - // Check if there's a quote after the equals sign - if (eq_pos + 1 < discovered.function_opener.length() && - (discovered.function_opener[eq_pos + 1] == '"' || discovered.function_opener[eq_pos + 1] == '\'')) { - ts.function_prefix = discovered.function_opener.substr(0, eq_pos + 2); - } else { - ts.function_prefix = discovered.function_opener.substr(0, eq_pos + 1); - } - ts.function_suffix = discovered.function_name_suffix; - - // For formats like {args}, where function_prefix - // IS the section start (no separate wrapper), tool_section_end is the function close. - // But for nested formats like ..., - // the function_close is separate from tool_section_end. - // We detect the non-nested case when tool_section_start matches function_prefix - // (or tool_section_start was already cleared because it matched). - bool section_start_matches_prefix = ts.tool_section_start.empty() || - ts.tool_section_start.find(ts.function_prefix) == 0 || - ts.function_prefix.find(ts.tool_section_start) == 0; - if (section_start_matches_prefix && ts.function_prefix.find('<') == 0 && !ts.tool_section_end.empty() && - ts.tool_section_end.find("functions.name:0<|tool_call_argument_begin|> - size_t namespace_dot = discovered.function_opener.rfind('.'); - bool has_namespace = - (namespace_dot != std::string::npos && namespace_dot == discovered.function_opener.length() - 1); - - bool has_index = - (!discovered.function_name_suffix.empty() && discovered.function_name_suffix[0] == ':' && - discovered.function_name_suffix.length() > 1 && - std::isdigit(static_cast(discovered.function_name_suffix[1]))); - - if (has_namespace && has_index) { - LOG_DBG("Detected FUNC_PREFIXED_INDEXED format: namespace ends with '.', suffix has ':N' index\n"); - ts.function_format = tool_call_structure::FUNC_PREFIXED_INDEXED; - - // Split function_opener into per_call_start and function_namespace - // e.g., "<|tool_call_begin|>functions." -> "<|tool_call_begin|>" + "functions." - // Find where the namespace starts (after the last '>' before the '.') - size_t namespace_start = discovered.function_opener.rfind('>'); - if (namespace_start != std::string::npos && namespace_start < namespace_dot) { - ts.per_call_start = discovered.function_opener.substr(0, namespace_start + 1); - ts.function_namespace = discovered.function_opener.substr(namespace_start + 1); - } else { - // Fallback: namespace is just the part ending with '.' - ts.per_call_start = discovered.function_opener.substr(0, namespace_dot); - ts.function_namespace = "."; - } - - // Extract args_marker from function_name_suffix - // Format: ":0<|some_marker|>" -> index is ":0", args_marker is "<|some_marker|>" - size_t args_marker_start = discovered.function_name_suffix.find('<'); - if (args_marker_start != std::string::npos) { - size_t args_marker_end = discovered.function_name_suffix.find('>', args_marker_start); - if (args_marker_end != std::string::npos) { - ts.args_marker = discovered.function_name_suffix.substr( - args_marker_start, args_marker_end - args_marker_start + 1); - } - } - - // Derive per_call_end from tool_call_closer by finding corresponding end marker - // tool_call_closer contains per_call_end + tool_section_end - // We find per_call_end by looking for a marker that structurally matches per_call_start - if (!discovered.tool_call_closer.empty() && !ts.per_call_start.empty()) { - // Extract structural pattern from per_call_start - // e.g., "<|tool_call_begin|>" -> look for "<|tool_call_...|>" in closer - size_t start_marker_begin = ts.per_call_start.find("<|"); - size_t start_marker_end = ts.per_call_start.rfind("|>"); - if (start_marker_begin != std::string::npos && start_marker_end != std::string::npos) { - // Find the base pattern (e.g., "<|tool_call" from "<|tool_call_begin|>") - std::string start_content = ts.per_call_start.substr( - start_marker_begin + 2, start_marker_end - start_marker_begin - 2); - // Find a related marker in the closer - size_t closer_pos = discovered.tool_call_closer.find("<|"); - while (closer_pos != std::string::npos) { - size_t closer_end = discovered.tool_call_closer.find("|>", closer_pos); - if (closer_end != std::string::npos) { - std::string candidate = - discovered.tool_call_closer.substr(closer_pos, closer_end - closer_pos + 2); - // Check if this marker shares a common prefix with per_call_start - // (ignoring _begin vs _end suffix differences) - std::string candidate_content = candidate.substr(2, candidate.length() - 4); - // Find common prefix between start_content and candidate_content - size_t common_len = 0; - while (common_len < start_content.length() && - common_len < candidate_content.length() && - start_content[common_len] == candidate_content[common_len]) { - common_len++; - } - // If substantial overlap (>50%), this is likely the per_call_end - if (common_len > start_content.length() / 2 && - candidate_content.find("end") != std::string::npos) { - ts.per_call_end = candidate; - break; - } - } - closer_pos = discovered.tool_call_closer.find("<|", closer_pos + 1); - } - } - } - - // Derive tool_section_end from tool_section_start by finding matching end marker - // For FUNC_PREFIXED_INDEXED, we always derive this to get the correct marker - // (the default discovered.tool_call_end_marker may contain extra content) - if (!ts.tool_section_start.empty()) { - size_t start_marker_begin = ts.tool_section_start.find("<|"); - size_t start_marker_end = ts.tool_section_start.rfind("|>"); - if (start_marker_begin != std::string::npos && start_marker_end != std::string::npos) { - std::string start_content = ts.tool_section_start.substr( - start_marker_begin + 2, start_marker_end - start_marker_begin - 2); - size_t closer_pos = discovered.tool_call_closer.find("<|"); - while (closer_pos != std::string::npos) { - size_t closer_end = discovered.tool_call_closer.find("|>", closer_pos); - if (closer_end != std::string::npos) { - std::string candidate = - discovered.tool_call_closer.substr(closer_pos, closer_end - closer_pos + 2); - std::string candidate_content = candidate.substr(2, candidate.length() - 4); - size_t common_len = 0; - while (common_len < start_content.length() && - common_len < candidate_content.length() && - start_content[common_len] == candidate_content[common_len]) { - common_len++; - } - if (common_len > start_content.length() / 2 && - candidate_content.find("end") != std::string::npos) { - ts.tool_section_end = candidate; - break; - } - } - closer_pos = discovered.tool_call_closer.find("<|", closer_pos + 1); - } - } - } - - LOG_DBG( - "FUNC_PREFIXED_INDEXED: per_call_start='%s', namespace='%s', args_marker='%s', " - "per_call_end='%s'\n", - ts.per_call_start.c_str(), ts.function_namespace.c_str(), ts.args_marker.c_str(), - ts.per_call_end.c_str()); - } else { - // Other formats like <|tool_call_begin|>name (non-indexed) - // Use function_opener as default, but try to use full tool_call_opener if it contains more - ts.function_prefix = discovered.function_opener; - LOG_DBG("Initial function_prefix: '%s', tool_call_opener: '%s', tool_section_start: '%s'\n", - ts.function_prefix.c_str(), discovered.tool_call_opener.c_str(), - ts.tool_section_start.c_str()); - if (!ts.tool_section_start.empty() && - discovered.tool_call_opener.find(ts.tool_section_start) == 0) { - std::string remainder = discovered.tool_call_opener.substr(ts.tool_section_start.length()); - LOG_DBG("Derived remainder: '%s'\n", remainder.c_str()); - if (remainder.length() > ts.function_prefix.length()) { - ts.function_prefix = remainder; - } - } - ts.function_suffix = discovered.function_name_suffix; - ts.function_close = discovered.function_closer; - } - } - } - } - - // Fix for templates where tool_section_start matches function_prefix (double wrapping) - // e.g. Functionary: tool_section_start="<|tool▁call▁begin|>function - // We need to derive tool_section_end from the outer marker pattern - if (ts.function_suffix.find("```") != std::string::npos && !ts.tool_section_start.empty()) { - // Check if tool_section_start contains nested markers (both outer and per-call) - // Pattern: ... - // We look for "calls" pattern which indicates an outer container - size_t calls_pos = ts.tool_section_start.find("calls"); - if (calls_pos != std::string::npos && calls_pos < ts.tool_section_start.length()) { - // Find where the outer marker ends (after the first >) - size_t first_close = ts.tool_section_start.find('>', calls_pos); - if (first_close != std::string::npos && first_close < ts.tool_section_start.length() - 1) { - // Extract the outer marker (e.g., "<|tool▁calls▁begin|>") - std::string outer_start = ts.tool_section_start.substr(0, first_close + 1); - // Derive the outer end marker by replacing "begin" with "end" - size_t begin_pos = outer_start.find("begin"); - if (begin_pos != std::string::npos) { - std::string outer_end = - outer_start.substr(0, begin_pos) + "end" + outer_start.substr(begin_pos + 5); - ts.tool_section_end = outer_end; - - // Strip outer marker from function_prefix and function_opener if they were combined - if (ts.tool_section_start.find(outer_start) == 0) { - std::string remainder = ts.tool_section_start.substr(outer_start.length()); - // Trim leading whitespace from remainder - size_t first_non_ws = remainder.find_first_not_of(" \t\n\r"); - if (first_non_ws != std::string::npos && first_non_ws > 0) { - remainder = remainder.substr(first_non_ws); - } - - // Concatenate with existing function_prefix (e.g. separator tag) - // but avoid double-concatenation if already present - if (!remainder.empty() && ts.function_prefix.find(remainder) == std::string::npos) { - ts.function_prefix = remainder + ts.function_prefix; - } - } - - // Update tool_section_start to be just the outer marker - ts.tool_section_start = outer_start; - - // Check if there's a fence in tool_call_closer that should be in function_close - // (DeepSeek R1 wraps JSON in markdown blocks within the custom tags) - if (discovered.tool_call_closer.find("```") != std::string::npos) { - size_t fence_pos = discovered.tool_call_closer.find("```"); - // Include leading newlines if present before the fence - while (fence_pos > 0 && (discovered.tool_call_closer[fence_pos - 1] == '\n' || - discovered.tool_call_closer[fence_pos - 1] == '\r')) { - fence_pos--; - } - ts.function_close = discovered.tool_call_closer.substr(fence_pos); - - // Clip function_close to not include tool_section_end (if they were combined in differential analysis) - if (!ts.tool_section_end.empty()) { - size_t end_pos = ts.function_close.find(ts.tool_section_end); - if (end_pos != std::string::npos) { - ts.function_close = ts.function_close.substr(0, end_pos); - } - } - - // Further trim any trailing EOS or prompt garbage - ts.function_close = strip_eos_token(ts.function_close); - size_t prompt_garbage = ts.function_close.find("<|"); - if (prompt_garbage != std::string::npos && prompt_garbage > 0 && - ts.function_close.substr(prompt_garbage).find("Assistant") != std::string::npos) { - ts.function_close = ts.function_close.substr(0, prompt_garbage); - } - } - } - } - } - } - - // General cleanup for tool_section_end when tool_section_start uses token markers (<|...|> or <|...|>) - // If tool_section_start contains a token marker with "begin" and tool_section_end is messy (contains } - // or multiple markers), derive tool_section_end by finding matching end marker in tool_call_closer - if (!ts.tool_section_start.empty() && !discovered.tool_call_closer.empty()) { - // Check if tool_section_start contains a token marker - size_t start_opener_pos = find_token_opener(ts.tool_section_start, 0); - size_t start_closer_pos = find_token_closer(ts.tool_section_start, start_opener_pos); - if (start_opener_pos != std::string::npos && start_closer_pos != std::string::npos) { - size_t opener_len = get_token_opener_length(ts.tool_section_start, start_opener_pos); - // Extract the token content (between opener and closer) - std::string start_content = ts.tool_section_start.substr(start_opener_pos + opener_len, - start_closer_pos - start_opener_pos - opener_len); - - // Check if tool_section_end needs cleanup (starts with } or contains multiple markers) - bool needs_cleanup = false; - if (!ts.tool_section_end.empty() && ts.tool_section_end[0] == '}') { - needs_cleanup = true; - } - // Count tokens in tool_section_end - size_t token_count = 0; - size_t pos = 0; - while ((pos = find_token_opener(ts.tool_section_end, pos)) != std::string::npos) { - token_count++; - pos += get_token_opener_length(ts.tool_section_end, pos); - } - if (token_count > 1) { - needs_cleanup = true; - } - - if (needs_cleanup) { - // Find matching end marker in tool_call_closer - // Look for a token that has similar content but with "end" instead of "begin" - pos = 0; - while ((pos = find_token_opener(discovered.tool_call_closer, pos)) != std::string::npos) { - size_t end_closer_pos = find_token_closer(discovered.tool_call_closer, pos); - if (end_closer_pos != std::string::npos) { - size_t op_len = get_token_opener_length(discovered.tool_call_closer, pos); - size_t cl_len = get_token_closer_length(discovered.tool_call_closer, end_closer_pos); - std::string candidate = discovered.tool_call_closer.substr(pos, end_closer_pos + cl_len - pos); - std::string candidate_content = - discovered.tool_call_closer.substr(pos + op_len, end_closer_pos - pos - op_len); - - // Check if this candidate matches our start marker structure - // Start content might be "tool▁calls▁begin" and candidate might be "tool▁calls▁end" - size_t begin_in_start = start_content.find("begin"); - size_t end_in_candidate = candidate_content.find("end"); - if (begin_in_start != std::string::npos && end_in_candidate != std::string::npos) { - // Check if they share a common prefix (e.g., "tool▁calls▁") - std::string start_base = start_content.substr(0, begin_in_start); - std::string cand_base = candidate_content.substr(0, end_in_candidate); - if (start_base == cand_base) { - ts.tool_section_end = candidate; - LOG_DBG( - "Derived tool_section_end='%s' from tool_section_start='%s' using token matching\n", - ts.tool_section_end.c_str(), ts.tool_section_start.c_str()); - break; - } - } - } - pos += get_token_opener_length(discovered.tool_call_closer, pos); - } - } - } - } - - // Determine argument format - if (!discovered.parameter_key_prefix.empty() && discovered.parameter_key_prefix.find('<') != std::string::npos) { - ts.argument_format = tool_call_structure::ARGS_TAGGED; - ts.arg_prefix = discovered.parameter_key_prefix; - ts.arg_suffix = discovered.parameter_key_suffix; - ts.arg_close = discovered.parameter_closer; - ts.arg_separator = discovered.argument_separator; - - // Check for specific GLM-4 style key-value tags - // Format: key\nvalue - // Analyzer detects suffix as: \n - if (ts.arg_suffix.find("") != std::string::npos) { - ts.argument_format = tool_call_structure::ARGS_KEY_VALUE_TAGS; - - // Clean up suffix to be just the key closer - size_t val_opener = ts.arg_suffix.find(""); - if (val_opener != std::string::npos) { - // Extract just the part (trimming whitespace/newlines before ) - std::string key_closer = ts.arg_suffix.substr(0, val_opener); - // Trim trailing whitespace/newlines - while (!key_closer.empty() && - (key_closer.back() == '\n' || key_closer.back() == '\r' || key_closer.back() == ' ')) { - key_closer.pop_back(); - } - ts.arg_suffix = key_closer; - } - } - } else { - ts.argument_format = tool_call_structure::ARGS_JSON; - } - - LOG_DBG("%s: final markers: section_start='%s', section_end='%s', prefix='%s', close='%s'\n", __func__, - ts.tool_section_start.c_str(), ts.tool_section_end.c_str(), ts.function_prefix.c_str(), - ts.function_close.c_str()); -} - -void template_analyzer::analyze_bracket_tag_format(tool_call_structure & ts, - const internal_discovered_pattern & discovered) { - // Bracket-tag format: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} (Mistral Small 3.2) - ts.supports_tools = true; - ts.function_format = tool_call_structure::FUNC_BRACKET_TAG; - ts.argument_format = tool_call_structure::ARGS_JSON; - - // The function_opener contains the bracket tag before the function name (e.g., "[TOOL_CALLS]") - // Each tool call starts with this tag, so it's the per_call_start, not a section wrapper - // tool_section_start/end should be empty since there's no overall section wrapper - ts.tool_section_start = ""; - ts.tool_section_end = ""; - ts.per_call_start = discovered.function_opener; - - // Extract markers from function_name_suffix (e.g., "[CALL_ID]call_0001[ARGS]" or just "[ARGS]") - // Pattern: [ID_MARKER]...[ARGS_MARKER] or just [ARGS_MARKER] - if (!discovered.function_name_suffix.empty()) { - // Find all bracket tags in the suffix - std::vector tags; - size_t pos = 0; - while ((pos = discovered.function_name_suffix.find('[', pos)) != std::string::npos) { - size_t end = discovered.function_name_suffix.find(']', pos); - if (end != std::string::npos) { - tags.push_back(discovered.function_name_suffix.substr(pos, end - pos + 1)); - pos = end + 1; - } else { - break; - } - } - - // Classify tags: args marker contains "ARG", id marker contains "ID" or "CALL" - for (const auto & tag : tags) { - std::string upper_tag = tag; - for (auto & c : upper_tag) { - c = static_cast(std::toupper(static_cast(c))); - } - if (upper_tag.find("ARG") != std::string::npos) { - ts.args_marker = tag; - } else if (upper_tag.find("ID") != std::string::npos || upper_tag.find("CALL") != std::string::npos) { - ts.id_marker = tag; - } - } - } - - LOG_DBG("FUNC_BRACKET_TAG: per_call_start='%s', id_marker='%s', args_marker='%s'\n", ts.per_call_start.c_str(), - ts.id_marker.c_str(), ts.args_marker.c_str()); -} - -void template_analyzer::analyze_recipient_based_format(tool_call_structure & ts, - const internal_discovered_pattern & discovered) { - // Recipient-based format (Functionary v3.2): >>>recipient\n{content} - // where recipient is either "all" (for content) or a function name (for tools) - ts.supports_tools = true; - ts.function_format = tool_call_structure::FUNC_RECIPIENT_BASED; - ts.argument_format = tool_call_structure::ARGS_JSON; // Python dict format, parse as JSON - - // The tool_call_start_marker is used as the recipient delimiter - ts.tool_section_start = discovered.tool_call_start_marker; - ts.tool_section_end = ""; - - // For recipient-based format, content is wrapped in tool_call_start_marker + "all\n" - // This needs to be detected and stripped. We detect this by checking if the - // content_start marker (from phase 1 analysis) starts with tool_call_start_marker - // If not already detected, infer it from the pattern. - // Note: This is set on the ContentStructure result, not ToolCallStructure - // The caller (analyze_template) will have the ContentStructure to modify - - LOG_DBG("FUNC_RECIPIENT_BASED: delimiter='%s'\n", ts.tool_section_start.c_str()); -} - -void template_analyzer::analyze_markdown_code_block_format(tool_call_structure & ts, - const internal_discovered_pattern & discovered) { - // Markdown code block format (Cohere Command-R Plus): - // Action: - // ```json - // [ - // { - // "tool_name": "...", - // "parameters": {...} - // } - // ] - // ``` - ts.supports_tools = true; - ts.function_format = tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK; - ts.argument_format = tool_call_structure::ARGS_JSON; - - // Extract the code block marker (e.g., "Action:") - // The tool_call_start_marker should contain "Action:" followed by newline - if (!discovered.tool_call_start_marker.empty()) { - // Extract just the marker text (e.g., "Action:") - // The marker may be followed by whitespace/newline in the template - size_t marker_end = discovered.tool_call_start_marker.find_first_of(" \n\r\t"); - if (marker_end != std::string::npos) { - ts.code_block_marker = discovered.tool_call_start_marker.substr(0, marker_end); - } else { - ts.code_block_marker = discovered.tool_call_start_marker; - } - } - - // Extract the code block language (e.g., "json") - // For Command-R Plus format: Action:\n```json\n[...] - // The code fence is in tool_call_opener (before the function name), not function_name_suffix - if (!discovered.function_name_suffix.empty() && discovered.function_name_suffix.find("```") != std::string::npos) { - // Format: ```json or ```json\n - size_t code_fence_pos = discovered.function_name_suffix.find("```"); - size_t lang_start = code_fence_pos + 3; - // Find the end of the language identifier (newline, space, or end of string) - size_t lang_end = discovered.function_name_suffix.find_first_of(" \n\r\t", lang_start); - if (lang_end != std::string::npos && lang_end > lang_start) { - ts.code_block_language = discovered.function_name_suffix.substr(lang_start, lang_end - lang_start); - } else { - // No language identifier after ```, will use "json" as default - ts.code_block_language = "json"; - } - } else if (!discovered.tool_call_opener.empty() && discovered.tool_call_opener.find("```") != std::string::npos) { - // Code fence is in tool_call_opener (before the function name) - // Format: Action:\n```json\n[... - size_t code_fence_pos = discovered.tool_call_opener.find("```"); - size_t lang_start = code_fence_pos + 3; - // Find the end of the language identifier (newline, space, or end of string) - size_t lang_end = discovered.tool_call_opener.find_first_of(" \n\r\t", lang_start); - if (lang_end != std::string::npos && lang_end > lang_start) { - ts.code_block_language = discovered.tool_call_opener.substr(lang_start, lang_end - lang_start); - } else { - // No language identifier after ```, will use "json" as default - ts.code_block_language = "json"; - } - } else { - // Default to "json" if no code fence found - ts.code_block_language = "json"; - } - - // The tool_section_end should be the closing code fence: ``` - if (!discovered.tool_call_closer.empty() && discovered.tool_call_closer.find("```") != std::string::npos) { - // Extract just the closing code fence (may have trailing content) - size_t fence_pos = discovered.tool_call_closer.find("```"); - size_t fence_end = fence_pos + 3; - // Include any non-newline characters after ``` (like language identifier if present) - while (fence_end < discovered.tool_call_closer.length() && discovered.tool_call_closer[fence_end] != '\n' && - discovered.tool_call_closer[fence_end] != '\r') { - fence_end++; - } - ts.tool_section_end = discovered.tool_call_closer.substr(fence_pos, fence_end - fence_pos); - } else { - // Default closing code fence - ts.tool_section_end = "```"; - } - - // JSON array format for function calls - ts.name_field = discovered.tool_name_field; - ts.args_field = discovered.tool_args_field; - ts.id_field = discovered.tool_id_field; - - LOG_DBG("FUNC_MARKDOWN_CODE_BLOCK: marker='%s', language='%s', section_end='%s'\n", ts.code_block_marker.c_str(), - ts.code_block_language.c_str(), ts.tool_section_end.c_str()); -} diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 0f4d153d06..a721a30f1c 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -1,250 +1,361 @@ -#include "chat-auto-parser-helpers.h" #include "chat-auto-parser.h" +#include "chat-diff-analyzer.h" #include "chat-peg-parser.h" #include "chat.h" #include "json-schema-to-grammar.h" -#include "log.h" #include "nlohmann/json.hpp" +#include -#include using json = nlohmann::ordered_json; -common_chat_params universal_peg_generator::generate_parser(const template_analysis_result & analysis, - const common_chat_template & tmpl, - const struct templates_params & inputs) { +// Helper to iterate over tools/functions +static void foreach_function(const json & tools, const std::function & fn) { + for (const auto & tool : tools) { + if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) { + continue; + } + fn(tool); + } +} + +common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl, + const struct templates_params & inputs) { + // Run differential analysis to extract template structure + auto analysis = differential_analyzer::analyze(tmpl); + return generate_parser(tmpl, inputs, analysis); +} + +common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl, + const struct templates_params & inputs, + const diff_analysis_result & analysis) { + // Check for thinking forced open + bool thinking_forced_open = (analysis.reasoning == reasoning_mode::FORCED_OPEN); + bool thinking_forced_closed = (analysis.reasoning == reasoning_mode::FORCED_CLOSED); + + // Build the parser using the analysis results + auto parser = build_parser(analysis, inputs, thinking_forced_open, thinking_forced_closed); + + // Create the result structure common_chat_params data; + data.prompt = common_chat_template_direct_apply(tmpl, inputs); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + data.preserved_tokens = analysis.preserved_tokens; + data.parser = parser.save(); - try { - LOG_DBG("%s\n", __func__); + // Build grammar if tools are present + bool has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + bool include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; - // Patch messages if template requires non-null content - // Some templates (e.g., iquest) render null as "None" when concatenating strings - std::optional messages_override; - if (analysis.tools.requires_nonnull_content && !inputs.messages.empty()) { - LOG_DBG("Patching null content to empty string (template requires non-null content)\n"); - json patched_messages = inputs.messages; - for (auto & msg : patched_messages) { - if (msg.contains("content") && msg["content"].is_null()) { - msg["content"] = ""; - } - } - messages_override = patched_messages; - } + if (include_grammar) { + data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; - if (inputs.messages.empty()) { - // Some templates don't handle empty messages well - always leave something in - json message = { - { { "role", "user" }, { "content", "Hello" } } - }; - messages_override.emplace(message); - } - - // Calculate prompt first to detect forced thinking - data.prompt = common_chat_template_direct_apply(tmpl, inputs, messages_override); - - // Determine if thinking is forced open based on prompt ending - bool thinking_forced_open = false; - if (analysis.content.reasoning_mode == content_structure::REASONING_FORCED_OPEN) { - if (inputs.enable_thinking) { - thinking_forced_open = true; - LOG_DBG("Thinking forced open based on template analysis\n"); - } else { - // Template ends with reasoning start marker but thinking is disabled - // Append the end marker to close it - data.prompt += analysis.content.reasoning_end; - LOG_DBG("Appended reasoning end marker since thinking is disabled\n"); - } - } - data.thinking_forced_open = thinking_forced_open; - - // Build the unified parser - auto arena = build_parser(analysis, tmpl, inputs, thinking_forced_open); - data.parser = arena.save(); - - // Determine format - bool has_tools = - inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; - - if (has_tools && analysis.tools.supports_tools) { - // Unified format that handles both JSON and tagged tool calls - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - LOG_DBG("Generated unified parser with tool support (format: PEG_NATIVE)\n"); - } else if (analysis.content.reasoning_mode != content_structure::REASONING_NONE) { - // Reasoning markers detected - use PEG parser to handle thinking blocks - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - LOG_DBG("Generated unified parser for reasoning handling (format: PEG_NATIVE)\n"); - } else if (analysis.content.content_mode != content_structure::CONTENT_PLAIN) { - // Content markers detected - use PEG parser to strip them even without tools - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - LOG_DBG("Generated unified parser for content marker stripping (format: PEG_NATIVE)\n"); - } else if (analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) { - // Recipient-based format (e.g., Functionary v3.2): >>>recipient\n{content} - // Need PEG parser to handle recipient delimiter parsing - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - LOG_DBG("Generated unified parser for recipient-based format (format: PEG_NATIVE)\n"); - } else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) { - // Tag-with-name format (e.g., func_name\n{args} for Functionary) - // Need PEG parser to handle function name parsing - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - LOG_DBG("Generated unified parser for tag-with-name format (format: PEG_NATIVE)\n"); - } else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) { - // Bracket-tag format (e.g., [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} for Mistral Small 3.2) - // Need PEG parser to handle bracket tag parsing - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - LOG_DBG("Generated unified parser for bracket-tag format (format: PEG_NATIVE)\n"); - } else if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { - // Prefixed-indexed format (e.g., Kimi-K2) - // Need PEG parser to handle namespace and indexed format - data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; - LOG_DBG("Generated unified parser for prefixed-indexed format (format: PEG_NATIVE)\n"); - } else { - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - LOG_DBG("Generated unified parser without tools or content markers (format: CONTENT_ONLY)\n"); - } - - // Determine trigger word for lazy grammar - std::string trigger_word; - if (!analysis.tools.tool_section_start.empty() || - analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) { - trigger_word = analysis.tools.tool_section_start; - } else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) { - trigger_word = analysis.tools.function_prefix; - } else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG || - analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { - // For formats with per-call markers, use per_call_start as trigger - trigger_word = analysis.tools.per_call_start; - } - - // Build grammar for tool calls - data.grammar_lazy = analysis.tools.supports_tools && has_tools; - - // For FUNC_TAG_WITH_NAME with empty prefix (Functionary), disable lazy grammar - // since there's no clear trigger word - constrain from the start - if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME && - analysis.tools.function_prefix.empty()) { - data.grammar_lazy = false; - } - - if (data.grammar_lazy) { - if (!trigger_word.empty()) { - data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_word }); - } - } - - // Build grammar data.grammar = build_grammar([&](const common_grammar_builder & builder) { - if (inputs.tools.is_array()) { - for (const auto & tool : inputs.tools) { - if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) { - continue; - } - const auto & function = tool.at("function"); - if (function.contains("parameters")) { - auto params = function.at("parameters"); - builder.resolve_refs(params); - } - } - } - arena.build_grammar(builder, data.grammar_lazy); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + auto schema = function.at("parameters"); + builder.resolve_refs(schema); + }); + parser.build_grammar(builder, data.grammar_lazy); }); - // Set preserved tokens from analysis - data.preserved_tokens = analysis.preserved_tokens; - - LOG_DBG("=== UNIFIED PEG PARSER GENERATION COMPLETED ===\n"); - - } catch (const std::exception & e) { - LOG_DBG("Unified parser generation failed: %s\n", e.what()); - throw; + // Set grammar triggers based on tool section markers (fall back to per-call markers) + std::string trigger_marker = !analysis.markers.tool_section_start.empty() + ? analysis.markers.tool_section_start + : analysis.markers.per_call_start; + if (!trigger_marker.empty()) { + data.grammar_triggers = { + { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker } + }; + } } return data; } -common_peg_arena universal_peg_generator::build_parser(const template_analysis_result & analysis, - const common_chat_template & tmpl, - const struct templates_params & inputs, - bool thinking_forced_open) { - GGML_UNUSED(tmpl); +common_peg_arena universal_peg_generator::build_parser(const diff_analysis_result & analysis, + const struct templates_params & inputs, + bool thinking_forced_open, + bool thinking_forced_closed) { + return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + p.set_allow_python_dict_format(true); + const auto & m = analysis.markers; - auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { - // Build reasoning block using ContentStructure - auto reasoning = p.build_reasoning_block(analysis.content, inputs.reasoning_format, thinking_forced_open); + common_peg_parser reasoning = p.eps(); + bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; + bool enable_thinking = inputs.enable_thinking; - // Build content block using ContentStructure - // Note: we don't pass tool_section_start here because content-before-tools handling - // is done inline in each branch below with p.content(p.until(marker)) - auto content = p.build_content_block(analysis.content, inputs.reasoning_format); - - // Build tool section using ToolCallStructure (if applicable) - bool has_tools = - inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; - - if (has_tools && analysis.tools.supports_tools) { - bool force_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; - auto tool_section = - p.build_tool_section(analysis.tools, inputs.tools, inputs.parallel_tool_calls, force_calls); - - // Compose: reasoning -> content before tools -> tool_section -> trailing content - // When thinking is forced open, the reasoning block expects . - // For tool-only messages (no thinking content), the model may output tools directly - // without the tag, so we need to make reasoning optional in that case. - // But if reasoning_format is NONE, the reasoning block is already eps() - don't wrap it - // in optional() as that would generate invalid grammar. - auto reasoning_for_tools = - (thinking_forced_open && inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE) ? - p.optional(reasoning) : - reasoning; - - if (!analysis.tools.tool_section_start.empty()) { - // With section markers: look for start marker to delimit content - auto content_before_tools = p.content(p.until(analysis.tools.tool_section_start)); - return p.sequence({ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, - p.space(), p.optional(p.content(p.rest())), p.end() }); - } - if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME && - !analysis.tools.function_prefix.empty()) { - // Tag-with-name format (e.g., >>>func_name): content stops at function prefix - auto content_before_tools = p.content(p.until(analysis.tools.function_prefix)); - return p.sequence( - { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() }); - } - if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) { - // Functionary-style format: tool call starts immediately (e.g., func_name\n{args}) - // No content before tools in this format - the entire output is the tool call - return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() }); - } - if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG || - analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { - // Bracket-tag (Mistral Small 3.2) or prefixed-indexed (Kimi-K2) format: - // Tool calls start with per_call_start marker (e.g., [TOOL_CALLS], <|tool_call_begin|>) - if (!analysis.tools.per_call_start.empty()) { - auto content_before_tools = p.content(p.until(analysis.tools.per_call_start)); - return p.sequence( - { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() }); + if (extract_reasoning && enable_thinking && analysis.reasoning != reasoning_mode::NONE) { + if (thinking_forced_open || thinking_forced_closed) { + // Thinking is forced open OR forced closed with enable_thinking=true + // In both cases, expect only the closing tag (opening was in template) + reasoning = p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end; + } else if (analysis.reasoning == reasoning_mode::TAG_BASED || + analysis.reasoning == reasoning_mode::TOOLS_ONLY) { + // Standard tag-based reasoning OR tools-only mode (reasoning appears with tools) + // Both use the same tag-based pattern if markers are available + if (!m.reasoning_start.empty() && !m.reasoning_end.empty()) { + reasoning = p.optional(m.reasoning_start + p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end); } - // Fallback: no content before tools - return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() }); + } else if (analysis.reasoning == reasoning_mode::DELIMITER) { + reasoning = p.optional(p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end); } - if (analysis.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK && - !analysis.tools.code_block_marker.empty()) { - // Markdown code block format (Cohere Command-R Plus): - // Content stops at the code_block_marker (e.g., "Action:") - auto content_before_tools = p.content(p.until(analysis.tools.code_block_marker)); - return p.sequence( - { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() }); - } - // No section markers (raw JSON format): content must stop at JSON object start - // Tool calls start with "{", so use that as a delimiter - auto content_before_tools = p.content(p.until("{")); - return p.sequence( - { reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() }); } - // No tools - just reasoning (if any) followed by content - return p.sequence({ reasoning, p.space(), content, p.end() }); - }); + bool has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty(); - return parser; + if (has_response_format) { + return reasoning + p.space() + p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end(); + } + + if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && analysis.supports_tools) { + return build_tool_parser(p, analysis, inputs, reasoning); + } + + if (analysis.content == content_mode::ALWAYS_WRAPPED && + !m.content_start.empty() && !m.content_end.empty()) { + + bool extracting_reasoning = extract_reasoning && enable_thinking && analysis.reasoning != reasoning_mode::NONE; + + if (extracting_reasoning) { + return reasoning + m.content_start + p.content(p.until(m.content_end)) + m.content_end + p.end(); + } + return p.content(p.until(m.content_start)) + m.content_start + + p.content(p.until(m.content_end)) + m.content_end + p.end(); + } + return reasoning + p.content(p.rest()) + p.end(); + }); } + +common_peg_parser universal_peg_generator::build_tool_parser( + common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning) { + + const auto & m = analysis.markers; + + // Build tool choice parser based on format + common_peg_parser tool_choice = p.choice(); + + if (analysis.tools == tool_format::JSON_NATIVE) { + // Pure JSON format: use standard_json_tools helper + // Build effective field names with dot notation if function_field is set + std::string name_field = analysis.name_field; + std::string args_field = analysis.args_field; + + if (!analysis.function_field.empty() && + analysis.function_field != "function" && + name_field.find('.') == std::string::npos) { + name_field = analysis.function_field + "." + name_field; + args_field = analysis.function_field + "." + args_field; + } + + auto tools_parser = p.standard_json_tools( + m.tool_section_start, + m.tool_section_end, + inputs.tools, + inputs.parallel_tool_calls, + inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, + name_field, + args_field, + analysis.tools_array_wrapped, + analysis.fun_name_is_key, + analysis.id_field, + analysis.gen_id_field, + analysis.parameter_order + ); + + // Handle content wrappers if present + if (analysis.content == content_mode::ALWAYS_WRAPPED && + !m.content_start.empty() && !m.content_end.empty()) { + auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end); + return reasoning + wrapped_content + tools_parser + p.end(); + } + + auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start); + return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); + } + + if (analysis.tools == tool_format::TAG_WITH_JSON) { + // Tag-based with JSON args: {args} + // With optional call_id: [CALL_ID]id[ARGS]{args} + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & schema = function.at("parameters"); + + // Build call_id parser based on position (if supported) + common_peg_parser call_id_section = p.eps(); + if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { + // Optional call_id followed by required call_id_suffix (which is also args_start) + // Format: optional([CALL_ID] + call_id_value) + [ARGS] + call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; + } + + auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + + call_id_section + + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)); + + if (!m.func_close.empty()) { + func_parser = func_parser + m.func_close; + } + + tool_choice |= p.rule("tool-" + name, func_parser); + }); + + auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + + common_peg_parser tool_calls = p.eps(); + + if (!m.per_call_start.empty()) { + // Per-call wrapping: each call individually wrapped + auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end; + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + wrapped_call + p.zero_or_more(p.space() + wrapped_call)); + } else { + tool_calls = p.trigger_rule("tool-call", wrapped_call); + } + if (!m.tool_section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + + tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); + } + } else { + std::string separator = m.call_separator; + if (separator.empty()) { + separator = ", "; // Default + } + + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end); + } else { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + tool_choice + m.tool_section_end); + } + } + + if (!require_calls) { + tool_calls = p.optional(tool_calls); + } + + std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; + auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); + return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); + } + + if (analysis.tools == tool_format::TAG_WITH_TAGGED) { + // Tag-based with tagged args: value + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); + + if (!params.contains("properties") || !params.at("properties").is_object()) { + return; + } + + const auto & properties = params.at("properties"); + std::set required; + if (params.contains("required") && params.at("required").is_array()) { + params.at("required").get_to(required); + } + + // Build parser for each argument + std::vector arg_parsers; + for (const auto & [param_name, param_schema] : properties.items()) { + bool is_required = required.find(param_name) != required.end(); + auto type = param_schema.value("type", "object"); + + auto arg = p.tool_arg( + p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix + + (type == "string" ? + p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : + p.tool_arg_json_value(p.schema(p.json(), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) + + p.tool_arg_close(p.literal(m.arg_value_suffix)) + ); + + if (is_required) { + arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg)); + } else { + arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); + } + } + + // Build arg sequence with space() between consecutive args + common_peg_parser args_seq = p.eps(); + for (size_t i = 0; i < arg_parsers.size(); i++) { + if (i > 0) { + args_seq = args_seq + p.space(); + } + args_seq = args_seq + arg_parsers[i]; + } + + // Build call_id parser based on position (if supported) + common_peg_parser call_id_section = p.eps(); + if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { + // Optional call_id followed by required call_id_suffix + call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; + } + + auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + + call_id_section + + p.space() + args_seq; + + if (!m.func_close.empty()) { + func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close)); + } else { + func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper + } + + tool_choice |= p.rule("tool-" + name, func_parser); + }); + + auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + + common_peg_parser tool_calls = p.eps(); + + if (!m.per_call_start.empty()) { + // Per-call wrapping: each call individually wrapped (e.g., ...) + auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end; + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); + } else { + tool_calls = p.trigger_rule("tool-call", wrapped_call); + } + if (!m.tool_section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + + tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); + } + } else { + std::string separator = m.call_separator; + if (separator.empty()) { + separator = ", "; // Default + } + + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end); + } else { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end); + } + } + + if (!require_tools) { + tool_calls = p.optional(tool_calls); + } + + std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; + auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); + return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); + } + + GGML_ABORT("Unable to create tool parser"); +} \ No newline at end of file diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp index c63012c2a8..9c345d6f6e 100644 --- a/common/chat-auto-parser-helpers.cpp +++ b/common/chat-auto-parser-helpers.cpp @@ -1,1419 +1,376 @@ #include "chat-auto-parser-helpers.h" -#include "chat-auto-parser.h" -#include "chat.h" -#include "log.h" - +#include "chat-diff-analyzer.h" #include "nlohmann/json.hpp" +#include + using json = nlohmann::ordered_json; -bool string_ends_with(const std::string & str, const std::string & suffix) { - return str.size() >= suffix.size() && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; +std::string trim_whitespace(const std::string & str) { + size_t start = 0; + while (start < str.length() && std::isspace(static_cast(str[start]))) { + start++; + } + + if (start == str.length()) { + return ""; + } + + size_t end = str.length() - 1; + while (end > start && std::isspace(static_cast(str[end]))) { + end--; + } + + return str.substr(start, end - start + 1); } -void trim_whitespace(std::string & str) { +std::string trim_leading_whitespace(const std::string & str) { + size_t start = 0; + while (start < str.length() && std::isspace(static_cast(str[start]))) { + start++; + } + + return str.substr(start); +} + +std::string trim_trailing_whitespace(const std::string & str) { if (str.empty()) { - return; - } - size_t first = str.find_first_not_of(" \n\t\r"); - if (first == std::string::npos) { - str.clear(); - return; - } - size_t last = str.find_last_not_of(" \n\t\r"); - str = str.substr(first, (last - first + 1)); -} - -void trim_trailing_newlines(std::string & str) { - while (!str.empty() && (str.back() == '\n' || str.back() == '\r')) { - str.pop_back(); - } -} - -size_t count_non_whitespace(const std::string & str) { - size_t count = 0; - for (char c : str) { - if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { - count++; - } - } - return count; -} - -size_t find_last_of_any(const std::string & str, const std::string & chars, size_t start_pos) { - size_t last_pos = std::string::npos; - for (char c : chars) { - size_t pos = str.rfind(c, start_pos); - if (pos != std::string::npos && (last_pos == std::string::npos || pos > last_pos)) { - last_pos = pos; - } - } - return last_pos; -} - -std::string extract_tag_name(const std::string & tag) { - if (tag.empty() || tag[0] != '<') { return ""; } - std::string tag_name = tag.substr(1); - size_t end_bracket = tag_name.find_first_of(" >"); - if (end_bracket != std::string::npos) { - tag_name = tag_name.substr(0, end_bracket); + + size_t end = str.length() - 1; + while (end > 0 && std::isspace(static_cast(str[end]))) { + end--; } - return tag_name; -} - -std::string create_closing_tag(const std::string & opening_tag) { - if (opening_tag.empty()) { + + // If first char is also whitespace, return empty string + if (end == 0 && std::isspace(static_cast(str[0]))) { return ""; } - if (opening_tag[0] == '<') { - std::string name = extract_tag_name(opening_tag); - return ""; - } - if (opening_tag.front() == '[' && opening_tag.back() == ']') { - std::string name = opening_tag.substr(1, opening_tag.length() - 2); - return "[/" + name + "]"; - } - return ""; + + return str.substr(0, end + 1); } -std::string find_common_prefix(const std::vector & strings) { - if (strings.empty()) { - return ""; - } - if (strings.size() == 1) { - return strings[0]; +std::string trim_trailing_newlines(const std::string & str) { + size_t end = str.length(); + while (end > 0 && str[end - 1] == '\n') { + end--; } - std::string common = strings[0]; - for (size_t i = 1; i < strings.size(); ++i) { - const std::string & current = strings[i]; - std::string temp_common; - for (size_t j = 0; j < common.length() && j < current.length(); ++j) { - if (common[j] == current[j]) { - temp_common += common[j]; - } else { - break; + return str.substr(0, end); +} + +// Helper to find unmatched bracket/tag in a string +// Finds an unmatched bracket in a string. +// search_backwards=true: finds unclosed opening bracket at end (returns bracket position) +// search_backwards=false: finds unopened closing bracket at start (returns position after bracket) +static size_t find_unmatched_bracket(const std::string & str, bool search_backwards) { + if (str.empty()) { + return std::string::npos; + } + + // Compute iteration bounds and bracket types based on direction + const char * primary_brackets = search_backwards ? "<[" : ">]"; + + for (size_t i = 0; i < str.length(); ++i) { + // Map iteration index to actual position based on direction + size_t pos = search_backwards ? (str.length() - 1 - i) : i; + char c = str[pos]; + + // Check if this is a primary bracket we're looking for + if (c == primary_brackets[0] || c == primary_brackets[1]) { + // Get the matching bracket: < matches >, [ matches ], and vice versa + char match_bracket = (c == '<' || c == '>') ? (c == '<' ? '>' : '<') : (c == '[' ? ']' : '['); + + // Search for matching bracket in the appropriate range + size_t inner_start = search_backwards ? (pos + 1) : 0; + size_t inner_end = search_backwards ? str.length() : pos; + bool found_match = false; + + for (size_t j = inner_start; j < inner_end; ++j) { + if (str[j] == match_bracket) { + found_match = true; + break; + } + } + + if (!found_match) { + return search_backwards ? pos : (pos + 1); } } - common = temp_common; } - return common; + + return std::string::npos; } -std::string find_common_suffix_generic(const std::vector & strings) { - if (strings.empty()) { - return ""; - } - if (strings.size() == 1) { - return strings[0]; +static size_t find_unclosed_bracket_at_end(const std::string & str) { + return find_unmatched_bracket(str, true); +} + +static size_t find_unopened_bracket_at_start(const std::string & str) { + return find_unmatched_bracket(str, false); +} + +// Returns true if `s` contains an unmatched bracket. +// search_backwards=true: looks for opening bracket without matching closing after it +// search_backwards=false: looks for closing bracket without matching opening before it +static bool contains_unmatched_bracket(const std::string & s, char opening, char closing, bool search_backwards) { + if (s.empty()) { + return false; } - std::string common = strings[0]; - for (size_t i = 1; i < strings.size(); ++i) { - const std::string & current = strings[i]; - std::string temp_common; - size_t min_len = std::min(common.length(), current.length()); - for (size_t j = 0; j < min_len; ++j) { - size_t pos_common = common.length() - j - 1; - size_t pos_current = current.length() - j - 1; - if (common[pos_common] == current[pos_current]) { - temp_common = common[pos_common] + temp_common; - } else { - break; + char primary = search_backwards ? opening : closing; + + for (size_t i = 0; i < s.length(); ++i) { + // Map iteration index to actual position based on direction + size_t pos = search_backwards ? (s.length() - 1 - i) : i; + + if (s[pos] == primary) { + // Search for matching bracket in the appropriate range + size_t inner_start = search_backwards ? (pos + 1) : 0; + size_t inner_end = search_backwards ? s.length() : pos; + char match_bracket = search_backwards ? closing : opening; + bool found_match = false; + + for (size_t j = inner_start; j < inner_end; ++j) { + if (s[j] == match_bracket) { + found_match = true; + break; + } + } + + if (!found_match) { + return true; } } - common = temp_common; } - return common; + return false; } -std::string find_common_substring_limited(const std::vector & strings, - size_t max_length, - const std::string & delimiters) { - std::string common = find_common_prefix(strings); - if (common.length() > max_length) { - size_t pos = find_last_of_any(common, delimiters, common.length() - 1); - if (pos != std::string::npos && pos > 0) { - return common.substr(0, pos + 1); +static bool contains_unopened_closing(const std::string & s, char opening, char closing) { + return contains_unmatched_bracket(s, opening, closing, false); +} + +static bool contains_unclosed_opening(const std::string & s, char opening, char closing) { + return contains_unmatched_bracket(s, opening, closing, true); +} + +// Moves incomplete tags from prefix/suffix into left/right parts +// Only moves tags when we detect the split pattern in BOTH left and right +static diff_split fix_tag_boundaries(diff_split result) { + // Check if prefix ends with an unclosed bracket/tag + // No fixed window: search the entire neighboring strings for matching brackets + size_t unclosed_pos = find_unclosed_bracket_at_end(result.prefix); + if (unclosed_pos != std::string::npos) { + char opening_bracket = result.prefix[unclosed_pos]; + char closing_bracket = (opening_bracket == '<') ? '>' : ']'; + + // Look for the specific closing bracket that matches our opening bracket + bool left_has_pattern = contains_unopened_closing(result.left, opening_bracket, closing_bracket); + bool right_has_pattern = contains_unopened_closing(result.right, opening_bracket, closing_bracket); + bool suffix_has_pattern = contains_unopened_closing(result.suffix, opening_bracket, closing_bracket); + + // Move the tag if both sides satisfy: has pattern OR is empty (and other has pattern) + // This handles cases like: left="" right="_begin|>..." or left="stuff>" right="stuff>" + bool left_satisfies = left_has_pattern || (result.left.empty() && suffix_has_pattern); + bool right_satisfies = right_has_pattern || (result.right.empty() && suffix_has_pattern); + + if (left_satisfies && right_satisfies) { + // Move the unclosed tag from prefix to left/right + std::string tag_part = result.prefix.substr(unclosed_pos); + result.prefix = result.prefix.substr(0, unclosed_pos); + result.left = tag_part + result.left; + result.right = tag_part + result.right; } - return common.substr(0, max_length); - } - return common; -} - -std::string apply_template(common_chat_template & tmpl, - const struct templates_params & inputs, - const std::optional & messages_override, - const std::optional & tools_override, - const std::optional & additional_context) { - struct templates_params final_inputs(inputs); - final_inputs.messages = messages_override ? *messages_override : inputs.messages; - if (tools_override) { - final_inputs.tools = *tools_override; - } else { - final_inputs.tools = inputs.tools.empty() ? json() : inputs.tools; - } - final_inputs.add_generation_prompt = inputs.add_generation_prompt; - final_inputs.extra_context = inputs.extra_context; - final_inputs.extra_context["enable_thinking"] = inputs.enable_thinking; - if (additional_context) { - final_inputs.extra_context.merge_patch(*additional_context); } - try { - return common_chat_template_direct_apply(tmpl, inputs); - } catch (const std::exception & e) { - LOG_ERR("Template application failed: %s\n", e.what()); - return ""; - } -} + // Check if suffix starts with an unopened bracket/tag + size_t unopened_end = find_unopened_bracket_at_start(result.suffix); + if (unopened_end != std::string::npos) { + char closing_bracket = + result.suffix[unopened_end - 1]; // -1 because unopened_end is position after the bracket + char opening_bracket = (closing_bracket == '>') ? '<' : '['; -std::string adjust_to_token_boundary(const std::string & str) { - if (str.empty()) { - return str; - } + // Check if BOTH left and right have the pattern of unclosed opening bracket at the end + bool left_has_pattern = contains_unclosed_opening(result.left, opening_bracket, closing_bracket); + bool right_has_pattern = contains_unclosed_opening(result.right, opening_bracket, closing_bracket); + bool prefix_has_pattern = contains_unclosed_opening(result.prefix, opening_bracket, closing_bracket); - // Check if the string ends in the middle of a <|...|> token - // Look for unmatched <| at the end + // Move the tag if both sides satisfy: has pattern OR is empty (and other has pattern) + bool left_satisfies = left_has_pattern || (result.left.empty() && prefix_has_pattern); + bool right_satisfies = right_has_pattern || (result.right.empty() && prefix_has_pattern); - // Find the last <| in the string - size_t last_open = str.rfind("<|"); - if (last_open == std::string::npos) { - return str; // No special tokens - } - - // Find if there's a |> after the last <| - size_t matching_close = str.find("|>", last_open + 2); - if (matching_close != std::string::npos) { - // The token is complete, return as-is - return str; - } - - // The string is truncated mid-token - // Truncate to just before the incomplete token - std::string result = str.substr(0, last_open); - - // Trim any trailing whitespace - while (!result.empty() && (result.back() == ' ' || result.back() == '\t' || result.back() == '\n')) { - result.pop_back(); + if (left_satisfies && right_satisfies) { + // Move the unopened tag from suffix to left/right + std::string tag_part = result.suffix.substr(0, unopened_end); + result.suffix = result.suffix.substr(unopened_end); + result.left = result.left + tag_part; + result.right = result.right + tag_part; + } } return result; } -// Fullwidth vertical bar: | (U+FF5C) is 3 bytes in UTF-8: 0xEF 0xBD 0x9C -static const std::string FULLWIDTH_PIPE = "\xef\xbd\x9c"; // | -static const std::string TOKEN_OPENER_STD = "<|"; -static const std::string TOKEN_OPENER_FW = "<" + FULLWIDTH_PIPE; // <| -static const std::string TOKEN_CLOSER_STD = "|>"; -static const std::string TOKEN_CLOSER_FW = FULLWIDTH_PIPE + ">"; // |> +diff_split calculate_diff_split(const std::string & left, const std::string & right) { + diff_split result; -size_t find_token_opener(const std::string & str, size_t start_pos) { - size_t pos_std = str.find(TOKEN_OPENER_STD, start_pos); - size_t pos_fw = str.find(TOKEN_OPENER_FW, start_pos); - - if (pos_std == std::string::npos) { - return pos_fw; + // Find longest common prefix + size_t prefix_len = 0; + size_t min_len = std::min(left.length(), right.length()); + while (prefix_len < min_len && left[prefix_len] == right[prefix_len]) { + prefix_len++; } - if (pos_fw == std::string::npos) { - return pos_std; - } - return std::min(pos_std, pos_fw); -} + result.prefix = left.substr(0, prefix_len); -size_t find_token_closer(const std::string & str, size_t start_pos) { - size_t pos_std = str.find(TOKEN_CLOSER_STD, start_pos); - size_t pos_fw = str.find(TOKEN_CLOSER_FW, start_pos); + // Find longest common suffix, ending no later than the end of the longest common prefix + size_t suffix_len = 0; + while (suffix_len < min_len - prefix_len) { + size_t left_pos = left.length() - 1 - suffix_len; + size_t right_pos = right.length() - 1 - suffix_len; - if (pos_std == std::string::npos) { - return pos_fw; - } - if (pos_fw == std::string::npos) { - return pos_std; - } - return std::min(pos_std, pos_fw); -} - -size_t get_token_opener_length(const std::string & str, size_t pos) { - if (pos >= str.length()) { - return 0; - } - if (str.compare(pos, TOKEN_OPENER_FW.length(), TOKEN_OPENER_FW) == 0) { - return TOKEN_OPENER_FW.length(); // 4 bytes for <| - } - if (str.compare(pos, TOKEN_OPENER_STD.length(), TOKEN_OPENER_STD) == 0) { - return TOKEN_OPENER_STD.length(); // 2 bytes for <| - } - return 0; -} - -size_t get_token_closer_length(const std::string & str, size_t pos) { - if (pos >= str.length()) { - return 0; - } - if (str.compare(pos, TOKEN_CLOSER_FW.length(), TOKEN_CLOSER_FW) == 0) { - return TOKEN_CLOSER_FW.length(); // 4 bytes for |> - } - if (str.compare(pos, TOKEN_CLOSER_STD.length(), TOKEN_CLOSER_STD) == 0) { - return TOKEN_CLOSER_STD.length(); // 2 bytes for |> - } - return 0; -} - -std::string strip_eos_token(const std::string & str) { - if (str.empty()) { - return str; - } - - // Find the last token in the string - // We need to find a token that looks like an EOS marker - // Common patterns: - // - <|eot_id|>, <|eos|>, <|end|>, <|endoftext|> - // - <|end▁of▁sentence|> (DeepSeek fullwidth) - - size_t last_closer = std::string::npos; - size_t search_pos = str.length(); - - // Search backwards for the last token closer - while (search_pos > 0) { - // Check for fullwidth closer first (it's longer) - if (search_pos >= TOKEN_CLOSER_FW.length()) { - size_t check_pos = search_pos - TOKEN_CLOSER_FW.length(); - if (str.compare(check_pos, TOKEN_CLOSER_FW.length(), TOKEN_CLOSER_FW) == 0) { - last_closer = check_pos; - break; - } - } - // Check for standard closer - if (search_pos >= TOKEN_CLOSER_STD.length()) { - size_t check_pos = search_pos - TOKEN_CLOSER_STD.length(); - if (str.compare(check_pos, TOKEN_CLOSER_STD.length(), TOKEN_CLOSER_STD) == 0) { - last_closer = check_pos; - break; - } - } - search_pos--; - } - - if (last_closer == std::string::npos) { - return str; // No token closer found - } - - // Find the corresponding opener - size_t opener_search_start = (last_closer > 100) ? last_closer - 100 : 0; - size_t last_opener = std::string::npos; - size_t opener_len = 0; - - for (size_t pos = opener_search_start; pos < last_closer; pos++) { - size_t len = get_token_opener_length(str, pos); - if (len > 0) { - last_opener = pos; - opener_len = len; - } - } - - if (last_opener == std::string::npos) { - return str; // No matching opener found - } - - // Extract the token content to check if it's an EOS marker - size_t closer_len = get_token_closer_length(str, last_closer); - size_t content_start = last_opener + opener_len; - size_t content_length = last_closer - content_start; - - if (content_length == 0 || content_length > 50) { - return str; // Invalid or too long token content - } - - std::string token_content = str.substr(content_start, content_length); - - // Convert to lowercase for comparison (ASCII only, sufficient for EOS markers) - std::string lower_content; - for (char c : token_content) { - lower_content += (c >= 'A' && c <= 'Z') ? (c + 32) : c; - } - - // Check if this looks like an EOS token - // True EOS tokens: - // - <|eos|>, <|eot_id|>, <|end_of_text|>, <|endoftext|> - // - <|end▁of▁sentence|> (DeepSeek fullwidth) - // NOT EOS tokens (structural markers): - // - <|END_ACTION|>, <|TOOL_CALL_END|>, <|end_thinking|>, etc. - - bool is_eos = false; - - // Check for specific EOS patterns - if (lower_content == "eos" || lower_content == "eot_id" || lower_content == "eot" || - lower_content == "end_of_text" || lower_content == "endoftext") { - is_eos = true; - } - // DeepSeek's end_of_sentence uses fullwidth underscore (▁) which is preserved in lower_content - // The token content would be "end▁of▁sentence" (with ▁ = U+2581) - else if (token_content.find("sentence") != std::string::npos || - token_content.find("\xe2\x96\x81of\xe2\x96\x81sentence") != std::string::npos) { - is_eos = true; - } - - if (!is_eos) { - return str; // Not an EOS token - } - - // Strip the EOS token - std::string result = str.substr(0, last_opener); - - LOG_DBG("Stripped EOS token '%s' from string\n", - str.substr(last_opener, last_closer + closer_len - last_opener).c_str()); - - return result; -} - -std::string find_string_difference(const std::string & base, const std::string & extended) { - size_t common_prefix = 0; - while (common_prefix < base.length() && common_prefix < extended.length() && - base[common_prefix] == extended[common_prefix]) { - common_prefix++; - } - return extended.substr(common_prefix); -} - -std::string extract_json_field_name(const std::string & opener, - const std::string & default_name, - const std::vector & candidates) { - for (const auto & candidate : candidates) { - std::string pattern = "\"" + candidate + "\""; - if (opener.find(pattern) != std::string::npos) { - LOG_DBG("Found JSON field name '%s' in opener\n", candidate.c_str()); - return candidate; - } - } - return default_name; -} - -std::string find_closing_pattern(const std::string & diff, size_t func_pos) { - std::vector closers = { "", " " }; - - std::string best_pattern; - size_t best_pos = std::string::npos; - - for (const auto & pattern : closers) { - size_t pos = diff.find(pattern, func_pos); - if (pos != std::string::npos) { - if (pos < best_pos) { - if (pattern == "', pos); - if (end_pos != std::string::npos) { - best_pattern = diff.substr(pos, end_pos - pos + 1); - best_pos = pos; - } - } else { - best_pattern = pattern; - best_pos = pos; - } - } - } - } - return best_pattern; -} - -std::string find_tool_call_start(const std::string & diff) { - std::vector start_patterns = { "<", "[", "{", "call", "func", "tool", "TOOL" }; - for (const auto & pattern : start_patterns) { - size_t pos = diff.find(pattern); - if (pos < 5) { - if (pattern == "<") { - size_t end_pos = diff.find('>', pos); - if (end_pos != std::string::npos) { - return diff.substr(pos, end_pos - pos + 1); - } - } - if (pattern == "[" || pattern == "{") { - size_t chunk_len = std::min(diff.length() - pos, (size_t) 60); - return diff.substr(pos, chunk_len); - } - - size_t end_pos = diff.find_first_of(">]} \n", pos); - if (end_pos != std::string::npos) { - if (diff[end_pos] == '>' || diff[end_pos] == ']' || diff[end_pos] == '}') { - return diff.substr(pos, end_pos - pos + 1); - } - return diff.substr(pos, end_pos - pos); - } - return diff.substr(pos, pattern.length()); - } - } - return ""; -} - -std::string find_tool_call_end(const std::string & diff, size_t func_pos) { - char opener_char = 0; - std::string start_tag_name; - - std::string openers = "[{<"; - size_t last_opener_pos = std::string::npos; - for (char c : openers) { - size_t p = diff.rfind(c, func_pos); - if (p != std::string::npos) { - if (last_opener_pos == std::string::npos || p > last_opener_pos) { - last_opener_pos = p; - opener_char = c; - } - } - } - - size_t unclosed_bracket = diff.rfind('[', func_pos); - if (unclosed_bracket != std::string::npos) { - size_t closer = diff.find(']', unclosed_bracket); - if (closer == std::string::npos || closer > func_pos) { - opener_char = '['; - } - } - - if (opener_char == '<') { - size_t tag_start = diff.find('<', last_opener_pos); - if (tag_start != std::string::npos) { - // Include '=' in search to handle style tags - // where the closing tag is , not - size_t tag_end = diff.find_first_of(" >=\n", tag_start); - if (tag_end != std::string::npos) { - start_tag_name = diff.substr(tag_start + 1, tag_end - (tag_start + 1)); - } - } - } - - if (!start_tag_name.empty()) { - std::string expected_closer = ""; - size_t pos = diff.find(expected_closer, func_pos); - if (pos != std::string::npos) { - if (opener_char == '[') { - size_t bracket_pos = diff.rfind(']', pos); - if (bracket_pos != std::string::npos && bracket_pos > func_pos) { - return diff.substr(bracket_pos, (pos + expected_closer.length()) - bracket_pos); - } - } - return expected_closer; - } - } - - std::vector end_patterns = { "", "```", "\n", " " }; - std::string best_pattern; - size_t best_pos = std::string::npos; - - auto is_structural = [](const std::string & s) { - if (s.empty()) { - return false; - } - return s[0] == ']' || s[0] == '}' || s[0] == '>' || (s.size() >= 2 && s.substr(0, 2) == "= 3 && s.substr(0, 3) == "```"); - }; - - for (const auto & pattern : end_patterns) { - size_t pos = diff.find(pattern, func_pos); - if (pos == std::string::npos) { - continue; + // Ensure we're not going into the prefix region + if (left_pos < prefix_len || right_pos < prefix_len) { + break; } - bool current_is_struct = is_structural(pattern); - bool best_is_struct = is_structural(best_pattern); - - bool better = false; - if (best_pattern.empty()) { - better = true; - } else if (pos < best_pos) { - better = !(best_is_struct && !current_is_struct) && - !(opener_char == '[' && best_pattern[0] == ']' && pattern[0] == '}'); + if (left[left_pos] == right[right_pos]) { + suffix_len++; } else { - if (!best_is_struct && current_is_struct && pos < best_pos + 400) { - better = true; - } else if (best_is_struct && current_is_struct && opener_char == '[' && pattern[0] == ']' && - best_pattern[0] == '}') { - if (pos < best_pos + 100) { - better = true; - } - } - } - - if (better) { - best_pattern = pattern; - best_pos = pos; - - if (current_is_struct && (pattern == "]" || pattern == "}" || pattern == "```")) { - size_t tag_start = diff.find('<', best_pos + pattern.length()); - if (tag_start != std::string::npos && tag_start < best_pos + pattern.length() + 5) { - size_t tag_end = diff.find('>', tag_start); - if (tag_end != std::string::npos) { - best_pattern = diff.substr(best_pos, tag_end - best_pos + 1); - } - } - } + break; } } + result.suffix = left.substr(left.length() - suffix_len); - return best_pattern; -} + // Extract the remainders (the parts between prefix and suffix) + result.left = left.substr(prefix_len, left.length() - prefix_len - suffix_len); + result.right = right.substr(prefix_len, right.length() - prefix_len - suffix_len); -std::string infer_tool_call_opener(const std::string & diff1, const std::string & diff2, const std::string & diff3) { - std::vector differences = { diff1, diff2, diff3 }; - return find_common_prefix(differences); -} + // Fix tag boundaries by moving incomplete tags to left/right + // We iterate because: + // 1. fix_tag_boundaries may move content from prefix/suffix to left/right + // 2. After that, we find common suffix in left/right to extract + // 3. The extracted suffix might contain tag parts that need fixing + // We apply fix AFTER suffix extraction to ensure incomplete tags aren't left in suffix + diff_split prev_result; + do { + prev_result = result; -std::string infer_tool_call_closer(const std::string & diff1, const std::string & diff2, const std::string & diff3) { - std::vector differences = { diff1, diff2, diff3 }; - return find_common_suffix_generic(differences); -} - -internal_discovered_pattern extract_patterns_from_differences(const std::string & tool1_diff, - const std::string & tool2_diff, - const std::string & tool3_diff, - const std::string & tool1_full) { - LOG_DBG("%s\n", __func__); - - internal_discovered_pattern patterns; - - size_t func1_pos = tool1_diff.rfind("test_function_name"); - size_t func2_pos = tool2_diff.rfind("test_function_name"); - - if (func1_pos != std::string::npos && func2_pos != std::string::npos) { - patterns.tool_call_opener = tool1_diff.substr(0, func1_pos); - - if (tool1_full.length() >= tool1_diff.length()) { - size_t diff_start = tool1_full.length() - tool1_diff.length(); - - if (diff_start > 0 && tool1_full[diff_start - 1] == '<' && !patterns.tool_call_opener.empty() && - patterns.tool_call_opener[0] != '<') { - patterns.tool_call_opener = "<" + patterns.tool_call_opener; - } - } - - if (func1_pos == 0 && !tool1_full.empty()) { - size_t func_in_full = tool1_full.rfind("test_function_name"); - if (func_in_full != std::string::npos && func_in_full > 0) { - // Look backwards from function name to find prefix pattern - // Find where the prefix ends (skip whitespace immediately before function name) - size_t prefix_end = func_in_full; - while (prefix_end > 0 && (tool1_full[prefix_end - 1] == ' ' || tool1_full[prefix_end - 1] == '\t')) { - prefix_end--; - } - - // Find where the prefix starts by looking for newline or alphanumeric boundary - size_t prefix_start = prefix_end; - while (prefix_start > 0) { - char c = tool1_full[prefix_start - 1]; - // Stop at newline - if (c == '\n' || c == '\r') { - break; - } - // Stop if we hit alphanumeric (probably content, not a prefix delimiter) - if (std::isalnum(static_cast(c)) || c == '_') { - prefix_start = prefix_end; // Reset - no valid prefix found - break; - } - prefix_start--; - } - - // Extract the prefix if we found something meaningful - if (prefix_start < prefix_end) { - std::string prefix = tool1_full.substr(prefix_start, prefix_end - prefix_start); - // Validate: prefix should contain non-whitespace and be reasonable length - bool has_content = false; - for (char c : prefix) { - if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { - has_content = true; - break; - } - } - if (has_content && prefix.length() >= 2 && prefix.length() <= 20) { - LOG_DBG("Found prefix pattern in full output: '%s'\n", prefix.c_str()); - patterns.function_opener = prefix; - patterns.tool_call_start_marker = prefix; - } - } - } - } - - patterns.tool_name_field = extract_json_field_name(patterns.tool_call_opener, "name", - { "tool_name", "name", "function_name", "function" }); - - patterns.tool_args_field = - extract_json_field_name(patterns.tool_call_opener + tool1_diff.substr(func1_pos), "arguments", - { "parameters", "arguments", "args", "params", "input" }); - - patterns.tool_id_field = - extract_json_field_name(tool1_diff, "", { "tool_call_id", "tool_id", "id", "call_id" }); - - size_t param1_pos = tool2_diff.find("\"param1\""); - bool param_has_quotes = (param1_pos != std::string::npos); - size_t param2_pos = tool2_diff.find("\"param2\""); - size_t value1_pos = tool2_diff.find("\"value1\""); - - if (param1_pos == std::string::npos) { - param1_pos = tool2_diff.find("param1"); - } - if (param_has_quotes && param1_pos != std::string::npos) { - param1_pos++; - } - if (param2_pos == std::string::npos) { - param2_pos = tool2_diff.find("param2"); - } - if (param_has_quotes && param2_pos != std::string::npos) { - param2_pos++; - } - if (value1_pos == std::string::npos) { - value1_pos = tool2_diff.find("value1"); - } - // Only skip quote if value was actually found quoted - bool value_has_quotes = (value1_pos != std::string::npos && tool2_diff[value1_pos] == '"'); - if (value_has_quotes) { - value1_pos++; - } - - if (param1_pos != std::string::npos && value1_pos != std::string::npos) { - size_t search_start = (param1_pos > 20) ? param1_pos - 20 : 0; - std::string pre_param = tool2_diff.substr(search_start, param1_pos - search_start); - - size_t delim_pos = pre_param.find_last_of('\n'); - if (delim_pos == std::string::npos) { - delim_pos = pre_param.find_last_of('>'); - } - - if (delim_pos != std::string::npos) { - patterns.parameter_key_prefix = pre_param.substr(delim_pos + 1); - - // If prefix is empty after '>', check for GLM-style key-value tags - // Pattern: param1value1 - // In this case, the '>' ends the opening tag, and we should include the whole tag - if (patterns.parameter_key_prefix.empty() && delim_pos > 0) { - // Look for matching '<' before the '>' - size_t open_bracket = pre_param.rfind('<', delim_pos); - if (open_bracket != std::string::npos) { - // Extract the whole tag as the prefix - patterns.parameter_key_prefix = pre_param.substr(open_bracket); - } - } + // First, find and extract any common suffix from left/right + size_t suffix_len = 0; + size_t min_len = std::min(result.left.length(), result.right.length()); + while (suffix_len < min_len) { + size_t left_pos = result.left.length() - 1 - suffix_len; + size_t right_pos = result.right.length() - 1 - suffix_len; + if (result.left[left_pos] == result.right[right_pos]) { + suffix_len++; } else { - size_t start_marker = pre_param.find_last_of("<{[ \""); - if (start_marker != std::string::npos) { - patterns.parameter_key_prefix = pre_param.substr(start_marker); - } else { - patterns.parameter_key_prefix = pre_param; - } - } - - trim_whitespace(patterns.parameter_key_prefix); - - size_t key_end = param1_pos + std::string("param1").length(); - if (value1_pos > key_end) { - patterns.parameter_key_suffix = tool2_diff.substr(key_end, value1_pos - key_end); - } - - size_t value1_end = value1_pos + std::string("value1").length(); - if (value1_end < tool2_diff.length()) { - // Try to find XML-style closing tag like - size_t close_start = tool2_diff.find("', close_start); - if (close_end != std::string::npos) { - patterns.parameter_closer = tool2_diff.substr(close_start, close_end - close_start + 1); - } - } + break; } } - const std::string & func_context = tool1_diff; - size_t open_pos = func_context.rfind('<', func1_pos); - if (open_pos != std::string::npos && open_pos < func1_pos) { - size_t close_pos = func_context.find('>', open_pos); - if (close_pos != std::string::npos && close_pos < func1_pos) { - bool is_adjacent = true; - for (size_t k = close_pos + 1; k < func1_pos; ++k) { - char c = func_context[k]; - if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { - is_adjacent = false; - break; - } - } - if (is_adjacent) { - patterns.function_opener = func_context.substr(open_pos, close_pos - open_pos + 1); - } - } else { - patterns.function_opener = func_context.substr(open_pos, func1_pos - open_pos); - } + if (suffix_len > 0) { + std::string common_suffix = result.left.substr(result.left.length() - suffix_len); + result.suffix = common_suffix + result.suffix; + result.left = result.left.substr(0, result.left.length() - suffix_len); + result.right = result.right.substr(0, result.right.length() - suffix_len); } - if (func1_pos > 0 && patterns.function_opener.empty()) { - size_t prefix_end = func1_pos; - // Skip whitespace immediately before function name - while (prefix_end > 0 && (func_context[prefix_end - 1] == ' ' || func_context[prefix_end - 1] == '\t')) { - prefix_end--; - } + // Then apply fix_tag_boundaries to move incomplete tags from prefix/suffix to left/right + result = fix_tag_boundaries(result); - // Find prefix start - look for newline or alphanumeric boundary - size_t prefix_start = prefix_end; - while (prefix_start > 0) { - char c = func_context[prefix_start - 1]; - if (c == '\n' || c == '\r') { - break; - } - if (std::isalnum(static_cast(c)) || c == '_') { - prefix_start = prefix_end; // Reset - no valid prefix - break; - } - prefix_start--; - } + } while (!(result == prev_result) && result.left != left && result.right != right); - if (prefix_start < prefix_end) { - // ... - } - } - - // Fallback: look for standard delimiters - if (patterns.function_opener.empty()) { - for (int i = (int) func1_pos - 1; i >= 0; i--) { - if (func_context[i] == '{' || func_context[i] == '[' || func_context[i] == '(' || - func_context[i] == '<') { - patterns.function_opener = func_context.substr(i, func1_pos - i); - break; - } - } - } - - size_t func_name_end = func1_pos + std::string("test_function_name").length(); - if (func_name_end < func_context.length()) { - char next_char = func_context[func_name_end]; - if (next_char == '>' || next_char == ']' || next_char == '}') { - patterns.function_name_suffix = std::string(1, next_char); - } else if (next_char == '"') { - if (func_name_end + 1 < func_context.length() && func_context[func_name_end + 1] == '>') { - patterns.function_name_suffix = "\">"; - } else { - patterns.function_name_suffix = "\""; - } - } else if (next_char == '<') { - // Check if it's an XML-like tag suffix (e.g. <|tool_call_argument_begin|>) - // But NOT if it's a closing tag (e.g., ) - that should be function_closer - if (func_name_end + 1 < func_context.length() && func_context[func_name_end + 1] == '/') { - // This is a closing tag like , not a suffix - // Leave function_name_suffix empty; function_closer will capture this - } else { - size_t tag_close = func_context.find('>', func_name_end); - if (tag_close != std::string::npos) { - // It seems to be a tag, use it as suffix - patterns.function_name_suffix = func_context.substr(func_name_end, tag_close - func_name_end + 1); - } - } - } else if (next_char == '[') { - // Bracket-tag format: [CALL_ID]id[ARGS] (Mistral Small 3.2 style) - // Find where the JSON arguments start (at '{') - size_t json_start = func_context.find('{', func_name_end); - if (json_start != std::string::npos) { - patterns.function_name_suffix = func_context.substr(func_name_end, json_start - func_name_end); - LOG_DBG("Found bracket-tag suffix: '%s'\n", patterns.function_name_suffix.c_str()); - } - } else if (next_char == ':') { - // Indexed format: function_name:0<|marker|> or function_name:0{args} - // Find where the suffix ends - either at a tag marker or at the JSON args start - size_t suffix_end = func_name_end + 1; - // Skip the index digits - while (suffix_end < func_context.length() && - std::isdigit(static_cast(func_context[suffix_end]))) { - suffix_end++; - } - if (suffix_end < func_context.length()) { - char after_index = func_context[suffix_end]; - if (after_index == '<') { - // There's a marker after the index (e.g., :0<|tool_call_argument_begin|>) - size_t tag_close = func_context.find('>', suffix_end); - if (tag_close != std::string::npos) { - patterns.function_name_suffix = - func_context.substr(func_name_end, tag_close - func_name_end + 1); - } else { - patterns.function_name_suffix = - func_context.substr(func_name_end, suffix_end - func_name_end); - } - } else { - // Just the index part (e.g., :0) - patterns.function_name_suffix = func_context.substr(func_name_end, suffix_end - func_name_end); - } - } - } else if (next_char == '\n' || next_char == '\r') { - // Check for markdown code block pattern (e.g., DeepSeek R1): \n```json\n{...}\n``` - size_t code_block_start = func_context.find("```", func_name_end); - if (code_block_start != std::string::npos && code_block_start < func_name_end + 10) { - // Found code block start after function name - // Skip the optional language tag (e.g., "json") - size_t newline_after_lang = func_context.find('\n', code_block_start + 3); - if (newline_after_lang != std::string::npos) { - // function_name_suffix should include everything up to (and including) the newline after language tag - patterns.function_name_suffix = - func_context.substr(func_name_end, newline_after_lang - func_name_end + 1); - LOG_DBG("Found markdown code block suffix: '%s'\n", patterns.function_name_suffix.c_str()); - } - } - } - } - - // Function closer - size_t search_start = func_name_end; - if (!patterns.function_name_suffix.empty()) { - search_start += patterns.function_name_suffix.length(); - } - patterns.function_closer = find_closing_pattern(func_context, search_start); - - // Fix for XML-style tag formats where function_closer was detected as "}" (JSON closing) - // but should be the actual tag closer (e.g., <|tool_call_end|> or <|tool▁call▁end|>) - if (patterns.function_closer == "}" && !patterns.function_opener.empty() && - patterns.function_opener[0] == '<') { - // This is an XML-style tag format, so the closer should be a tag, not just "}" - // Find the next tag marker after the search position - size_t next_tag = func_context.find('<', search_start); - if (next_tag != std::string::npos) { - // Handle both standard <|...|> and fullwidth <|...|> formats - size_t closer_pos = find_token_closer(func_context, next_tag); - if (closer_pos != std::string::npos) { - size_t closer_len = get_token_closer_length(func_context, closer_pos); - patterns.function_closer = func_context.substr(next_tag, closer_pos - next_tag + closer_len); - LOG_DBG("Adjusted function_closer from '}' to tag '%s' for XML-style format\n", - patterns.function_closer.c_str()); - } - } - } - - if (patterns.function_closer == "}" && !patterns.function_name_suffix.empty() && - patterns.function_name_suffix.find("```") != std::string::npos) { - // function_name_suffix contains a code block opener, look for the closing code block - size_t code_block_end = func_context.find("```", search_start); - if (code_block_end != std::string::npos) { - // Found closing code block, extract everything from ``` to end of tool call - // The closer should be \n``` (everything from ``` to the end marker) - size_t after_block = code_block_end + 3; - // Find the next tag marker (e.g., <|tool_call_end|>) - size_t next_tag = func_context.find('<', after_block); - if (next_tag != std::string::npos) { - size_t tag_end = func_context.find('>', next_tag); - if (tag_end != std::string::npos) { - // Don't include leading newline - the JSON args parser consumes trailing whitespace - // So start exactly at the ``` (code_block_end) - patterns.function_closer = func_context.substr(code_block_end, tag_end - code_block_end + 1); - LOG_DBG("Detected markdown code block args, adjusted function_closer to: '%s'\n", - patterns.function_closer.c_str()); - } - } - } - } - - // Tool call start marker - if (patterns.function_opener.length() > 0 && - patterns.tool_call_opener.length() > patterns.function_opener.length()) { - size_t opener_start = patterns.tool_call_opener.length() - patterns.function_opener.length(); - if (opener_start > 0) { - std::string before_func = patterns.tool_call_opener.substr(0, opener_start); - size_t last_bracket = before_func.find_last_of('['); - size_t tool_obj_brace = std::string::npos; - if (last_bracket != std::string::npos && last_bracket + 1 < before_func.length()) { - tool_obj_brace = before_func.find('{', last_bracket + 1); - } - - if (tool_obj_brace != std::string::npos) { - patterns.tool_call_start_marker = before_func.substr(0, tool_obj_brace); - } else if (last_bracket != std::string::npos) { - patterns.tool_call_start_marker = before_func.substr(0, last_bracket + 1); - } else { - patterns.tool_call_start_marker = before_func; - } - } - } else if (patterns.tool_call_start_marker.empty()) { - // Only search if not already set (e.g., by >>> prefix detection) - patterns.tool_call_start_marker = find_tool_call_start(tool1_diff); - } - - if (patterns.tool_call_opener.empty()) { - patterns.tool_call_opener = infer_tool_call_opener(tool1_diff, tool2_diff, tool3_diff); - if (func1_pos != std::string::npos && patterns.tool_call_opener.length() > func1_pos) { - patterns.tool_call_opener = patterns.tool_call_opener.substr(0, func1_pos); - } - } - if (patterns.tool_call_closer.empty()) { - patterns.tool_call_closer = infer_tool_call_closer(tool1_diff, tool2_diff, tool3_diff); - } - - patterns.tool_call_end_marker = find_tool_call_end(func_context, func1_pos); - - if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.length() > 1) { - size_t eos_pos = patterns.tool_call_end_marker.find("<|"); - if (eos_pos == 1) { - // Check if there's a bracket/brace before the token - char first_char = patterns.tool_call_end_marker[0]; - if (first_char == ']' || first_char == '}') { - // Check if this is an actual EOS token (contains "eot_id" or "eos") - std::string token_content = patterns.tool_call_end_marker.substr(eos_pos); - if (token_content.find("eot_id") != std::string::npos || - token_content.find("eos") != std::string::npos) { - // This is an EOS token, strip it - patterns.tool_call_end_marker = patterns.tool_call_end_marker.substr(0, 1); - } - } - } - } - - // Trim whitespace - if (!patterns.tool_call_end_marker.empty()) { - size_t first = patterns.tool_call_end_marker.find_first_not_of(" \n\t"); - size_t last = patterns.tool_call_end_marker.find_last_not_of(" \n\t"); - if (first != std::string::npos && last != std::string::npos) { - patterns.tool_call_end_marker = patterns.tool_call_end_marker.substr(first, (last - first + 1)); - } - } - - // If tool_call_end_marker matches function_closer, it found the wrong tag. - // Use tool_call_closer instead which is derived from common suffix of diffs. - if (!patterns.function_closer.empty() && patterns.tool_call_end_marker == patterns.function_closer) { - if (!patterns.tool_call_closer.empty()) { - // Try to extract a proper closing tag from tool_call_closer - // Use rfind to get the LAST closing tag (e.g., not ) - size_t close_start = patterns.tool_call_closer.rfind("', close_start); - if (close_end != std::string::npos) { - patterns.tool_call_end_marker = - patterns.tool_call_closer.substr(close_start, close_end - close_start + 1); - } - } - } - } else if (patterns.tool_call_end_marker == ">" && !patterns.tool_call_closer.empty() && - patterns.tool_call_closer.length() > 3) { - // If the specific end marker is just ">", but the common suffix (tool_call_closer) is substantial (e.g. <|tool_calls_section_end|>) - // then prefer the common suffix, as finding ">" might just be hitting the end of the last function call - if (patterns.tool_call_closer.find(patterns.tool_call_end_marker) != std::string::npos) { - patterns.tool_call_end_marker = patterns.tool_call_closer; - } - } - - if (patterns.tool_call_start_marker.empty()) { - std::vector diffs = { tool1_diff, tool2_diff, tool3_diff }; - patterns.tool_call_start_marker = find_common_substring_limited(diffs, 20, " \n\t<[{"); - } - - // Truncate if needed, but skip if func_pos is 0 (marker found via full output) - if (func1_pos != std::string::npos && func1_pos > 0 && patterns.tool_call_start_marker.length() > func1_pos) { - std::string candidate = patterns.tool_call_start_marker.substr(0, func1_pos); - size_t last_opener = candidate.find_last_of("{["); - if (last_opener != std::string::npos) { - patterns.tool_call_start_marker = candidate.substr(0, last_opener); - } else { - patterns.tool_call_start_marker = candidate; - } - } - - // Ensure we don't truncate in the middle of <|...|> tokens - patterns.tool_call_start_marker = adjust_to_token_boundary(patterns.tool_call_start_marker); - patterns.tool_call_end_marker = adjust_to_token_boundary(patterns.tool_call_end_marker); - - // Final trim - if (!patterns.tool_call_start_marker.empty()) { - size_t first = patterns.tool_call_start_marker.find_first_not_of(" \n\t\r"); - size_t last = patterns.tool_call_start_marker.find_last_not_of(" \n\t\r"); - if (first != std::string::npos && last != std::string::npos) { - patterns.tool_call_start_marker = patterns.tool_call_start_marker.substr(first, (last - first + 1)); - } - } - } - - return patterns; + return result; } -internal_tool_format determine_format_from_patterns(const internal_discovered_pattern & patterns) { - LOG_DBG("%s\n", __func__); - - if (patterns.tool_call_opener.empty() && patterns.tool_call_closer.empty() && patterns.function_opener.empty() && - patterns.function_closer.empty() && patterns.parameter_opener.empty() && patterns.parameter_closer.empty() && - patterns.argument_separator.empty() && patterns.tool_call_start_marker.empty() && - patterns.tool_call_end_marker.empty()) { - LOG_DBG("All patterns are empty - template doesn't support tool calls\n"); - return FORMAT_UNKNOWN; +// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right` +std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right) { + // Find the common prefix of left and right + size_t common_prefix_len = 0; + size_t min_len = std::min(left.length(), right.length()); + while (common_prefix_len < min_len && left[common_prefix_len] == right[common_prefix_len]) { + common_prefix_len++; } - // Check for markdown code block format (Cohere Command-R Plus) - // STRUCTURAL PATTERN: Action:\n```json\n[...]\n``` - // Key indicators: - // 1. tool_call_start_marker contains "Action:" or similar plain text marker - // 2. function_name_suffix or tool_call_closer contains "```" (markdown code fence) - // 3. tool_call_opener starts with "[" indicating JSON array - bool has_code_fence = false; - if (!patterns.function_name_suffix.empty() && patterns.function_name_suffix.find("```") != std::string::npos) { - has_code_fence = true; - } - if (!patterns.tool_call_closer.empty() && patterns.tool_call_closer.find("```") != std::string::npos) { - has_code_fence = true; - } - bool has_action_marker = false; - if (!patterns.tool_call_start_marker.empty()) { - std::string marker_lower = patterns.tool_call_start_marker; - std::transform(marker_lower.begin(), marker_lower.end(), marker_lower.begin(), ::tolower); - if (marker_lower.find("action") != std::string::npos) { - has_action_marker = true; - } - } - if (has_code_fence && has_action_marker) { - LOG_DBG("Detected MARKDOWN_CODE_BLOCK format (Action: + ```json code fence)\n"); - return FORMAT_MARKDOWN_CODE_BLOCK; + // If there's no common prefix, return empty string + if (common_prefix_len == 0) { + return ""; } - // Check for recipient-based routing format (e.g., Functionary v3.2) - // STRUCTURAL PATTERN: The same marker is used for both content routing and tool routing - // Key indicators: - // 1. tool_call_start_marker == function_opener (same marker used for both) - // 2. No parameter markers (arguments are plain dict/JSON, not wrapped in tags) - // 3. No XML-style tags (differentiates from FUNC_TAG_WITH_NAME) - // 4. function_opener doesn't start with structural chars like {, [, < (differentiates from other formats) - if (!patterns.tool_call_start_marker.empty() && !patterns.function_opener.empty() && - patterns.tool_call_start_marker == patterns.function_opener) { - // Check this isn't an XML-tagged format (opener would start with '<') - if (patterns.function_opener[0] != '<' && patterns.function_opener[0] != '{' && - patterns.function_opener[0] != '[') { - // Check there are no parameter markers - if (patterns.parameter_opener.empty() && patterns.parameter_closer.empty()) { - LOG_DBG("Detected RECIPIENT_BASED format (tool_call_start_marker == function_opener = '%s')\n", - patterns.tool_call_start_marker.c_str()); - return FORMAT_RECIPIENT_BASED; - } - } + // Find the common prefix in the full string + std::string common_prefix = left.substr(0, common_prefix_len); + size_t pos = full.find(common_prefix); + + // If not found, return empty string + if (pos == std::string::npos) { + return ""; } - if (!patterns.tool_call_opener.empty()) { - if (patterns.tool_call_opener.find("{\"name\":") != std::string::npos || - patterns.tool_call_opener.find("{"name":") != std::string::npos) { - LOG_DBG("Detected JSON_NATIVE format from tool_call_opener JSON structure\n"); - return FORMAT_JSON_NATIVE; - } - } - - if (!patterns.function_opener.empty() && patterns.function_opener.find('<') == 0) { - bool has_substantial_param_markers = false; - if (!patterns.parameter_opener.empty()) { - has_substantial_param_markers = (count_non_whitespace(patterns.parameter_opener) > 1); - } - if (!has_substantial_param_markers && !patterns.parameter_closer.empty()) { - has_substantial_param_markers = (count_non_whitespace(patterns.parameter_closer) > 1); - } - - if (!has_substantial_param_markers) { - if ((!patterns.tool_call_opener.empty() && (patterns.tool_call_opener.find('[') != std::string::npos || - patterns.tool_call_opener.find('{') != std::string::npos)) || - (!patterns.tool_call_start_marker.empty() && - (patterns.tool_call_start_marker.find('[') != std::string::npos || - patterns.tool_call_start_marker.find('{') != std::string::npos))) { - LOG_DBG("Detected JSON_NATIVE format (XML markers but JSON structure)\n"); - return FORMAT_JSON_NATIVE; - } - } - - LOG_DBG("Detected XML_CONSTRUCTED format from function_opener\n"); - return FORMAT_XML_CONSTRUCTED; - } - - if (!patterns.function_opener.empty() && patterns.function_opener.find('{') == 0) { - LOG_DBG("Detected JSON_NATIVE format from function_opener\n"); - return FORMAT_JSON_NATIVE; - } - - // Check for bracket-tag format: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} - // Detected when function_name_suffix contains bracket tags like [CALL_ID]...[ARGS] - if (!patterns.function_name_suffix.empty() && patterns.function_name_suffix.find('[') != std::string::npos && - patterns.function_name_suffix.find(']') != std::string::npos) { - LOG_DBG("Detected BRACKET_TAG format from function_name_suffix containing bracket tags\n"); - return FORMAT_BRACKET_TAG; - } - - if (!patterns.tool_call_start_marker.empty() && - (patterns.tool_call_start_marker.find('<') == 0 || patterns.tool_call_start_marker.find('[') == 0)) { - bool is_prefix_marker = - patterns.tool_call_start_marker.find("<|") == 0 || patterns.tool_call_start_marker.find("[|") == 0; - // Check for bracket-tag format: [TAG] style without | (e.g., [TOOL_CALLS]) - bool is_bracket_tag = patterns.tool_call_start_marker.find('[') == 0 && - patterns.tool_call_start_marker.find("[|") != 0 && - patterns.tool_call_start_marker.find(']') != std::string::npos; - if (is_bracket_tag) { - LOG_DBG("Detected BRACKET_TAG format from tool_call_start_marker\n"); - return FORMAT_BRACKET_TAG; - } - if (is_prefix_marker) { - LOG_DBG("Detected JSON_NATIVE format from tool_call_start_marker (instruction-based)\n"); - return FORMAT_JSON_NATIVE; - } - - LOG_DBG("Detected XML_CONSTRUCTED format from tool_call_start_marker\n"); - return FORMAT_XML_CONSTRUCTED; - } - - if (!patterns.tool_call_start_marker.empty() && patterns.tool_call_start_marker.find('{') == 0) { - LOG_DBG("Detected JSON_NATIVE format from tool_call_start_marker\n"); - return FORMAT_JSON_NATIVE; - } - - if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.find('>') == 0) { - LOG_DBG("Detected XML_CONSTRUCTED format from tool_call_end_marker\n"); - return FORMAT_XML_CONSTRUCTED; - } - - if (!patterns.tool_call_end_marker.empty() && patterns.tool_call_end_marker.find('}') == 0) { - LOG_DBG("Detected JSON_NATIVE format from tool_call_end_marker\n"); - return FORMAT_JSON_NATIVE; - } - - LOG_DBG("Format could not be determined from patterns\n"); - return FORMAT_UNKNOWN; + // Return everything before the common prefix + return full.substr(0, pos); } -internal_discovered_pattern analyze_by_differential(const common_chat_template & tmpl) { - internal_discovered_pattern patterns; - - try { - LOG_DBG("%s\n", __func__); - - auto caps = tmpl.original_caps(); - bool minja_supports_tool_calls = caps.supports_tool_calls; - if (!minja_supports_tool_calls) { - LOG_DBG("Template doesn't support standard tool calls (per minja caps detection)\n"); - } - - // Define tools for testing - json tools = { - { { "type", "function" }, - { "function", - { { "name", "test_function_name" }, - { "description", "A test function" }, - { "parameters", - { { "type", "object" }, - { "properties", - { { "param1", { { "type", "string" }, { "description", "First parameter" } } }, - { "param2", { { "type", "string" }, { "description", "Second parameter" } } } } }, - { "required", json::array({ "param1", "param2" }) } } } } } }, - { { "type", "function" }, - { "function", - { { "name", "another_test_function" }, - { "description", "Another test function" }, - { "parameters", - { { "type", "object" }, - { "properties", - { { "param1", { { "type", "string" }, { "description", "First parameter" } } } } }, - { "required", json::array({ "param1" }) } } } } } } - }; - - // Test payload 1: Tool definitions + user + assistant with content only (no tool calls) - json user_msg = { - { "role", "user" }, - { "content", "Please help me with a task." } - }; - - json assistant_content_only = { - { "role", "assistant" }, - { "content", "I'll help you with that task right away." } - }; - - // Test payload 2: Tool definitions + user + assistant with content + tool calls - json assistant_content_with_tool = { - { "role", "assistant" }, - { "content", "I'll help you with that task right away." }, - { "tool_calls", - json::array( - { { { "id", "call_0001" }, - { "type", "function" }, - { "function", - { { "name", "test_function_name" }, - { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } - }; - - // Also test with content = null + tool calls (some templates check for this) - json assistant_null_content_with_tool = { - { "role", "assistant" }, - { "content", nullptr }, - { "tool_calls", - json::array( - { { { "id", "call_0001" }, - { "type", "function" }, - { "function", - { { "name", "test_function_name" }, - { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } - }; - - struct templates_params inputs; - inputs.tools = tools; - inputs.add_generation_prompt = false; - - // Helper function to safely render template, handling null content issues - auto safe_render = [&](const json & messages) -> std::string { - try { - // First try with the original messages - inputs.messages = messages; - return common_chat_template_direct_apply(tmpl, inputs); - } catch (const std::exception & e) { - // If it fails, try replacing null content with empty string - json fixed_messages = messages; - for (auto & msg : fixed_messages) { - if (msg.contains("content") && msg["content"].is_null()) { - msg["content"] = ""; - } - } - inputs.messages = fixed_messages; - try { - return common_chat_template_direct_apply(tmpl, inputs); - } catch (...) { - return ""; - } - } - }; - - // Render payload 1: content only - std::string output_content_only = safe_render({ user_msg, assistant_content_only }); - - // Render payload 2: content + tool calls - std::string output_content_with_tool = safe_render({ user_msg, assistant_content_with_tool }); - - // Render payload 3: null content + tool calls - std::string output_null_content_with_tool = safe_render({ user_msg, assistant_null_content_with_tool }); - - LOG_DBG("Output 1 (content only): %s\n", output_content_only.c_str()); - LOG_DBG("Output 2 (content + tools): %s\n", output_content_with_tool.c_str()); - LOG_DBG("Output 3 (null + tools): %s\n", output_null_content_with_tool.c_str()); - - // Check if the template renders tool calls in any scenario - // Test 1: content vs content+tool_calls (for templates that render both) - // Test 2: content vs null+tool_calls (for templates that only render tools when content is null) - bool renders_tool_calls_with_content = (output_content_only != output_content_with_tool); - bool renders_tool_calls_without_content = (output_content_only != output_null_content_with_tool); - - if (!renders_tool_calls_with_content && !renders_tool_calls_without_content) { - LOG_DBG("Template does NOT render tool calls in any scenario\n"); - // Return empty patterns to indicate no tool support - return patterns; - } - - LOG_DBG("Template renders tool calls, proceeding with differential analysis\n"); - - // If we get here, the template does support tool calls - // Use the original differential analysis approach but now we know it's valid - json base_msg = { - { "role", "assistant" }, - { "content", "MARKER" } - }; - - // Use nullptr for content to trigger tool_calls branch in templates that check "content is none" - // Include "id" field as some templates (e.g., Mistral Nemo) require it - json tool_msg1 = { - { "role", "assistant" }, - { "content", nullptr }, - { "tool_calls", - json::array( - { { { "id", "call_0001" }, - { "type", "function" }, - { "function", { { "name", "test_function_name" }, { "arguments", json::object() } } } } }) } - }; - - json tool_msg2 = { - { "role", "assistant" }, - { "content", nullptr }, - { "tool_calls", - json::array( - { { { "id", "call_0001" }, - { "type", "function" }, - { "function", - { { "name", "test_function_name" }, - { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) } - }; - - json tool_msg3 = { - { "role", "assistant" }, - { "content", nullptr }, - { "tool_calls", - json::array( - { { { "id", "call_0001" }, - { "type", "function" }, - { "function", { { "name", "test_function_name" }, { "arguments", json::object() } } } }, - { { "id", "call_0002" }, - { "type", "function" }, - { "function", { { "name", "another_test_function" }, { "arguments", json::object() } } } } }) } - }; - - inputs.messages = { user_msg, base_msg }; - auto base_output = safe_render({ user_msg, base_msg }); - - inputs.messages = { user_msg, tool_msg1 }; - auto tool1_output = safe_render({ user_msg, tool_msg1 }); - - // Detect if template renders null content as "None" (Python/Jinja string representation) - // This happens when templates concatenate content without null checks, e.g.: - // {{ '<|im_start|>' + message.role + '\n' + content }} - // Check if "None" appears in the tool output where it shouldn't - if (tool1_output.find("None") != std::string::npos) { - // Verify this is actually from null content by checking if it goes away with empty string - json tool_msg1_empty_content = tool_msg1; - tool_msg1_empty_content["content"] = ""; - auto tool1_output_empty = safe_render({ user_msg, tool_msg1_empty_content }); - if (tool1_output_empty.find("None") == std::string::npos) { - LOG_DBG("Template renders null content as 'None', switching to empty string\n"); - patterns.requires_nonnull_content = true; - tool1_output = tool1_output_empty; - - // Update tool messages to use empty string instead of null - tool_msg1["content"] = ""; - tool_msg2["content"] = ""; - tool_msg3["content"] = ""; - } - } - - inputs.messages = { user_msg, tool_msg2 }; - auto tool2_output = safe_render({ user_msg, tool_msg2 }); - - inputs.messages = { user_msg, tool_msg3 }; - auto tool3_output = safe_render({ user_msg, tool_msg3 }); - - std::string tool1_diff = find_string_difference(base_output, tool1_output); - std::string tool2_diff = find_string_difference(base_output, tool2_output); - std::string tool3_diff = find_string_difference(base_output, tool3_output); - - LOG_DBG("Tool1 diff length: %zu\n", tool1_diff.length()); - LOG_DBG("Tool2 diff length: %zu\n", tool2_diff.length()); - LOG_DBG("Tool3 diff length: %zu\n", tool3_diff.length()); - - if (tool1_diff.empty() && tool2_diff.empty() && tool3_diff.empty()) { - LOG_DBG("All diffs are empty - trying without add_generation_prompt\n"); - // Try with add_generation_prompt variations - json alternative_base_msg = { - { "role", "assistant" }, - { "content", "MARKER" } - }; - - templates_params alt_inputs; - alt_inputs.tools = tools; - alt_inputs.messages = { user_msg, alternative_base_msg }; - alt_inputs.add_generation_prompt = false; - auto alt_base = common_chat_template_direct_apply(tmpl, alt_inputs); - - alt_inputs.messages = { user_msg, tool_msg1 }; - auto alt_tool1 = common_chat_template_direct_apply(tmpl, alt_inputs); - - tool1_diff = find_string_difference(alt_base, alt_tool1); - if (!tool1_diff.empty()) { - // If we found a diff using the alternative approach, we must use the corresponding - // full output for pattern extraction (otherwise diff indices will be invalid) - tool1_output = alt_tool1; - - alt_inputs.messages = { user_msg, tool_msg2 }; - tool2_diff = find_string_difference(alt_base, common_chat_template_direct_apply(tmpl, inputs)); - alt_inputs.messages = { user_msg, tool_msg3 }; - tool3_diff = find_string_difference(alt_base, common_chat_template_direct_apply(tmpl, inputs)); - } - } - - patterns = extract_patterns_from_differences(tool1_diff, tool2_diff, tool3_diff, tool1_output); - - LOG_DBG("=== ENDING TEMPLATE DIFFERENTIAL ANALYSIS ===\n"); - - } catch (const std::exception & e) { - LOG_DBG("Template differential analysis failed: %s\n", e.what()); +// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right` +std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right) { + // Find the common suffix of left and right (compare from the end) + size_t common_suffix_len = 0; + size_t min_len = std::min(left.length(), right.length()); + while (common_suffix_len < min_len && + left[left.length() - 1 - common_suffix_len] == right[right.length() - 1 - common_suffix_len]) { + common_suffix_len++; } - return patterns; + // If there's no common suffix, return empty string + if (common_suffix_len == 0) { + return ""; + } + + // Extract the common suffix + std::string common_suffix = left.substr(left.length() - common_suffix_len); + + // Find the last occurrence of the common suffix in the full string + size_t pos = full.rfind(common_suffix); + + // If not found, return empty string + if (pos == std::string::npos) { + return ""; + } + + // Return everything after the common suffix + return full.substr(pos + common_suffix_len); } + +std::vector segmentize_markers(const std::string & text) { + std::vector retval; + bool in_marker = false; + char marker_opener = '\0'; + + auto is_marker_opener = [](char c) -> bool { return c == '<' || c == '['; }; + auto is_marker_closer = [](char op, char c) -> bool { return (op == '<' && c == '>') || (op == '[' && c == ']'); }; + + size_t last_border = 0; + + for (size_t cur_pos = 0; cur_pos < text.length(); cur_pos++) { + if (!in_marker && is_marker_opener(text[cur_pos])) { + if (last_border < cur_pos) { + retval.push_back(segment(segment_type::TEXT, text.substr(last_border, cur_pos - last_border))); + } + last_border = cur_pos; + in_marker = true; + marker_opener = text[cur_pos]; + } else if (in_marker && is_marker_closer(marker_opener, text[cur_pos])) { + // no need to check because last_border will always be smaller + retval.push_back(segment(segment_type::MARKER, text.substr(last_border, cur_pos - last_border + 1))); + last_border = cur_pos + 1; + in_marker = false; + marker_opener = '\0'; + } + } + if (last_border < text.length()) { + retval.push_back(segment(segment_type::TEXT, text.substr(last_border))); + } + return retval; +} + diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h index 5162b09fbe..e9534d6715 100644 --- a/common/chat-auto-parser-helpers.h +++ b/common/chat-auto-parser-helpers.h @@ -1,133 +1,22 @@ #pragma once -#include +#include "chat-diff-analyzer.h" #include -#include -#include "chat.h" -#include "nlohmann/json.hpp" +std::string trim_whitespace(const std::string & str); +std::string trim_leading_whitespace(const std::string & str); +std::string trim_trailing_whitespace(const std::string & str); +std::string trim_trailing_newlines(const std::string & str); -using json = nlohmann::ordered_json; +// calculate a diff split (longest common prefix, longest common suffix excluding prefix, +// mismatched part on the left, mismatched part on the right) between two strings +diff_split calculate_diff_split(const std::string & left, const std::string & right); -namespace minja { -class chat_template; -} +// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right` +std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right); -void trim_whitespace(std::string & str); -void trim_trailing_newlines(std::string & str); -size_t count_non_whitespace(const std::string & str); -size_t find_last_of_any(const std::string & str, const std::string & chars, size_t start_pos); +// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right` +std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right); -std::string extract_tag_name(const std::string & tag); -std::string create_closing_tag(const std::string & opening_tag); - -std::string find_common_prefix(const std::vector & strings); -std::string find_common_suffix_generic(const std::vector & strings); -std::string find_common_substring_limited(const std::vector & strings, - size_t max_length, - const std::string & delimiters); - -bool string_ends_with(const std::string & str, const std::string & suffix); -std::string apply_template(common_chat_template & tmpl, - const struct templates_params & inputs, - const std::optional & messages_override = std::nullopt, - const std::optional & tools_override = std::nullopt, - const std::optional & additional_context = std::nullopt); - -// Adjust a marker string to ensure it ends at a complete <|...|> token boundary -// This prevents truncation mid-token -std::string adjust_to_token_boundary(const std::string & str); - -// Find the position of a token opener (<| or <|) in a string -// Returns std::string::npos if not found -size_t find_token_opener(const std::string & str, size_t start_pos = 0); - -// Find the position of a token closer (|> or |>) in a string -// Returns std::string::npos if not found -size_t find_token_closer(const std::string & str, size_t start_pos = 0); - -// Get the length of the token opener at the given position (2 for <| or 4 for <|) -// Returns 0 if no valid opener at position -size_t get_token_opener_length(const std::string & str, size_t pos); - -// Get the length of the token closer at the given position (2 for |> or 4 for |>) -// Returns 0 if no valid closer at position -size_t get_token_closer_length(const std::string & str, size_t pos); - -// Strip EOS/end-of-sentence tokens from the end of a string -// Handles both standard (<|eos|>, <|eot_id|>) and fullwidth (<|end▁of▁sentence|>) formats -std::string strip_eos_token(const std::string & str); - -// Internal structure for differential analysis (used during pattern extraction) -struct internal_discovered_pattern { - std::string tool_call_opener; - std::string tool_call_closer; - std::string function_opener; - std::string function_closer; - std::string function_name_suffix; - std::string parameter_opener; - std::string parameter_closer; - std::string argument_separator; - std::string parameter_key_prefix; - std::string parameter_key_suffix; - std::string tool_call_start_marker; - std::string tool_call_end_marker; - std::string reasoning_start_marker; - std::string reasoning_end_marker; - std::string content_start_marker; - std::string content_end_marker; - std::string tool_name_field = "name"; - std::string tool_args_field = "arguments"; - std::string tool_id_field; - // For markdown code block format (Cohere Command-R Plus) - std::string code_block_marker; // e.g., "Action:" - std::string code_block_language; // e.g., "json" - // Flag: template renders null content as "None" string, requires empty string instead - bool requires_nonnull_content = false; -}; - -// Internal enum for format classification -enum internal_tool_format { - FORMAT_JSON_NATIVE, - FORMAT_XML_CONSTRUCTED, - FORMAT_BRACKET_TAG, // [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} (Mistral Small 3.2) - FORMAT_RECIPIENT_BASED, // >>>recipient\n{content} (Functionary v3.2) - FORMAT_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[...]\n``` (Cohere Command-R Plus) - FORMAT_CONTENT_ONLY, - FORMAT_UNKNOWN -}; - -// Find the suffix that differentiates an extended string from a base string -std::string find_string_difference(const std::string & base, const std::string & extended); - -// Extract JSON field name from an opener string -std::string extract_json_field_name(const std::string & opener, - const std::string & default_name, - const std::vector & candidates); - -// Find a closing pattern in a string starting from a given position -std::string find_closing_pattern(const std::string & diff, size_t func_pos); - -// Find the tool call start marker in a difference string -std::string find_tool_call_start(const std::string & diff); - -// Find the tool call end marker in a difference string -std::string find_tool_call_end(const std::string & diff, size_t func_pos); - -// Infer the tool call opener from multiple difference strings -std::string infer_tool_call_opener(const std::string & diff1, const std::string & diff2, const std::string & diff3); - -// Infer the tool call closer from multiple difference strings -std::string infer_tool_call_closer(const std::string & diff1, const std::string & diff2, const std::string & diff3); - -// Extract patterns from differences between tool calls -internal_discovered_pattern extract_patterns_from_differences(const std::string & tool1_diff, - const std::string & tool2_diff, - const std::string & tool3_diff, - const std::string & tool1_full = ""); - -// Determine the format classification from discovered patterns -internal_tool_format determine_format_from_patterns(const internal_discovered_pattern & patterns); - -// Analyze template using differential analysis (internal use) -internal_discovered_pattern analyze_by_differential(const common_chat_template & tmpl); +// Segmentize text into markers and non-marker fragments +std::vector segmentize_markers(const std::string & text); \ No newline at end of file diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h index 6062f4d37a..c6587667d1 100644 --- a/common/chat-auto-parser.h +++ b/common/chat-auto-parser.h @@ -1,183 +1,54 @@ #pragma once +#include "chat-diff-analyzer.h" #include "chat.h" +#include "chat-peg-parser.h" #include "common.h" -#include "jinja/runtime.h" #include #include -#include using json = nlohmann::ordered_json; -// Phase 1 result: Content and reasoning structure (analyzed without tools) -struct content_structure { - // Reasoning handling mode - enum reasoning_mode_type { - REASONING_NONE, // No reasoning markers detected - REASONING_OPTIONAL, // ... may appear before content - REASONING_FORCED_OPEN, // Template ends with open reasoning tag (thinking_forced_open) - }; - - reasoning_mode_type reasoning_mode = REASONING_NONE; - std::string reasoning_start; // e.g., "", "<|START_THINKING|>" - std::string reasoning_end; // e.g., "", "<|END_THINKING|>" - - // Content wrapping mode - enum content_mode_type { - CONTENT_PLAIN, // No content markers - CONTENT_ALWAYS_WRAPPED, // ... always present - CONTENT_WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present - }; - - content_mode_type content_mode = CONTENT_PLAIN; - std::string content_start; // e.g., "", "<|START_RESPONSE|>" - std::string content_end; // e.g., "", "<|END_RESPONSE|>" -}; - -// Phase 2 result: Tool call structure (layered on Phase 1) -struct tool_call_structure { - bool supports_tools = false; - - // Container markers (what wraps all tool calls) - std::string tool_section_start; // e.g., "", "[TOOL_CALLS]", "", "" - std::string tool_section_end; // e.g., "", "]", "", "" - - // Function format (how individual functions are structured) - enum function_format { - FUNC_JSON_OBJECT, // {"name": "X", "arguments": {...}} - FUNC_TAG_WITH_NAME, // {...} - FUNC_TAG_NAME_ONLY, // ... where X is function name (rare) - FUNC_PREFIXED_INDEXED, // <|tool_call_begin|>functions.X:0<|tool_call_argument_begin|>{...}<|tool_call_end|> - FUNC_NAME_AS_KEY, // [{"function_name": {...arguments...}}] (Apertus-style) - FUNC_BRACKET_TAG, // [TOOL_CALLS]X[CALL_ID]id[ARGS]{...} (Mistral Small 3.2 style) - FUNC_RECIPIENT_BASED, // >>>recipient\n{content} where recipient is "all" (content) or function name (tools) - FUNC_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[...]\n``` (Cohere Command-R Plus style) - }; - - function_format function_format = FUNC_JSON_OBJECT; - - // For FUNC_JSON_OBJECT format - field names (may vary between templates) - std::string name_field = "name"; // Could be "tool_name", "function" - std::string args_field = "arguments"; // Could be "parameters", "params", "input" - std::string id_field; // Optional: "id", "tool_call_id", "" - - // For FUNC_TAG_WITH_NAME format - std::string function_prefix; // e.g., "" - std::string function_close; // e.g., "" - - // For FUNC_PREFIXED_INDEXED format (e.g., Kimi-K2) - std::string per_call_start; // e.g., "<|tool_call_begin|>" - std::string function_namespace; // e.g., "functions." (prefix before function name) - std::string args_marker; // e.g., "<|tool_call_argument_begin|>" - std::string per_call_end; // e.g., "<|tool_call_end|>" - - // For FUNC_BRACKET_TAG format (e.g., Mistral Small 3.2) - std::string id_marker; // e.g., "[CALL_ID]" - marker before tool call ID - - // For FUNC_MARKDOWN_CODE_BLOCK format (e.g., Cohere Command-R Plus) - std::string code_block_marker; // e.g., "Action:" - text marker before code block - std::string code_block_language; // e.g., "json" - language identifier in code fence - - // Argument format (how arguments are structured within a function) - enum argument_format { - ARGS_JSON, // Standard JSON object: {"key": "value", ...} - ARGS_TAGGED, // XML-style: value - ARGS_KEY_VALUE_TAGS, // keyvalue (GLM-4.6) - }; - - argument_format argument_format = ARGS_JSON; - - // For ARGS_TAGGED format - std::string arg_prefix; // e.g., "" - std::string arg_close; // e.g., "", "" - std::string arg_separator; // e.g., "", "\n" - - // Flag: template renders null content as "None" string, requires empty string instead - bool requires_nonnull_content = false; -}; - -// Combined result of unified template analysis -struct template_analysis_result { - content_structure content; - tool_call_structure tools; - - // Preserved tokens for tokenizer (union of all markers) - std::vector preserved_tokens; -}; - -// Template analyzer that uses two-phase differential analysis -class template_analyzer { - public: - // Main entry point: Unified two-phase analysis - static template_analysis_result analyze_template(const common_chat_template & tmpl); - - // Phase 1 - Analyze content and reasoning structure (no tools) - static content_structure analyze_content_structure(const common_chat_template & tmpl); - - // Phase 2 - Analyze tool call structure (layered on Phase 1) - static tool_call_structure analyze_tool_structure(const common_chat_template & tmpl, - const content_structure & content); - - private: - // Phase 1 detection helpers - static void detect_reasoning_markers(const common_chat_template & tmpl, content_structure & cs); - static void detect_content_markers(const common_chat_template & tmpl, content_structure & cs); - static content_structure::reasoning_mode_type detect_reasoning_mode(const content_structure & cs, - const std::string & prompt); - - // Phase 2 detection helpers - static void detect_tool_markers(const common_chat_template & tmpl, tool_call_structure & ts); - static void detect_function_format(const common_chat_template & tmpl, tool_call_structure & ts); - static void detect_argument_format(const common_chat_template & tmpl, tool_call_structure & ts); - - // Phase 2 helper methods - static void analyze_json_format(tool_call_structure & ts, const struct internal_discovered_pattern & discovered); - static void analyze_xml_format(tool_call_structure & ts, const struct internal_discovered_pattern & discovered); - static void analyze_bracket_tag_format(tool_call_structure & ts, - const struct internal_discovered_pattern & discovered); - static void analyze_recipient_based_format(tool_call_structure & ts, - const struct internal_discovered_pattern & discovered); - static void analyze_markdown_code_block_format(tool_call_structure & ts, - const struct internal_discovered_pattern & discovered); - - // Helper to collect preserved tokens from analysis result - static void collect_preserved_tokens(template_analysis_result & result); -}; - struct templates_params { json messages; json tools; common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; json json_schema; bool parallel_tool_calls = true; - common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO; - bool stream = true; + common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO; + bool stream = true; std::string grammar; bool add_generation_prompt = false; - bool enable_thinking = true; - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + bool enable_thinking = true; + std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); json extra_context; - bool add_bos = false; - bool add_eos = false; - bool is_inference = true; + bool add_bos = false; + bool add_eos = false; + bool is_inference = true; bool add_inference = false; - bool mark_input = true; // whether to mark input strings in the jinja context + bool mark_input = true; // whether to mark input strings in the jinja context }; class universal_peg_generator { public: - // Generate parser from analysis result - static common_chat_params generate_parser(const template_analysis_result & analysis, - const common_chat_template & tmpl, - const struct templates_params & inputs); + static common_chat_params generate_parser(const common_chat_template & tmpl, + const struct templates_params & inputs); + + static common_chat_params generate_parser(const common_chat_template & tmpl, + const struct templates_params & inputs, + const diff_analysis_result & analysis); private: // Build unified parser (single code path for all formats) - static common_peg_arena build_parser(const template_analysis_result & analysis, - const common_chat_template & tmpl, - const struct templates_params & inputs, - bool thinking_forced_open); + static common_peg_arena build_parser(const diff_analysis_result & analysis, + const struct templates_params & inputs, + bool thinking_forced_open, + bool thinking_forced_closed = false); + + // Build tool calling parser based on detected format + static common_peg_parser build_tool_parser(common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning); }; diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp new file mode 100644 index 0000000000..6afb9342c2 --- /dev/null +++ b/common/chat-diff-analyzer.cpp @@ -0,0 +1,1670 @@ +#include "chat-diff-analyzer.h" + +#include "chat-auto-parser-helpers.h" +#include "chat-auto-parser.h" +#include "chat.h" +#include "log.h" +#include "nlohmann/json.hpp" + +#include +#include + +#define ANSI_RESET "\033[0m" +#define ANSI_PURPLE "\033[1m\x1b[38;5;126m" +#define ANSI_ORANGE "\033[1m\x1b[38;5;214m" +#define ANSI_RED "\033[1m\x1b[38;5;196m" + +using json = nlohmann::ordered_json; + +static std::vector> workarounds( + { // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to + // support reasoning on them + [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { + if (tmpl.src.find("content.split('')") != std::string::npos && + analysis.reasoning == reasoning_mode::NONE) { + analysis.reasoning = reasoning_mode::FORCED_OPEN; + analysis.markers.reasoning_start = ""; + analysis.markers.reasoning_end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET); + } + }, + // Granite 3.3, with separate reasoning and content markers + [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { + if (tmpl.src.find("Write your thoughts between and write your response between " + "") != std::string::npos) { + analysis.reasoning = reasoning_mode::TAG_BASED; + analysis.markers.reasoning_start = ""; + analysis.markers.reasoning_end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + analysis.content = content_mode::WRAPPED_WITH_REASONING; + analysis.markers.content_start = ""; + analysis.markers.content_end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET); + } + }, + // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|> + [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { + if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos && + tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.markers.content_start.empty()) { + analysis.content = content_mode::ALWAYS_WRAPPED; + analysis.markers.content_start = "<|CHATBOT_TOKEN|>"; + analysis.markers.content_end = "<|END_OF_TURN_TOKEN|>"; + analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>"); + analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>"); + LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET); + } + }, + // Functionary - no tool call section delimiter + [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { + if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", " + "\"code_interpreter\") | list | length > 0") != std::string::npos) { + analysis.content = content_mode::PLAIN; + analysis.markers.content_end = ""; + analysis.markers.func_name_prefix = ""; + analysis.markers.tool_section_start = ""; + analysis.markers.tool_section_end = ""; + analysis.markers.per_call_start = ""); + analysis.preserved_tokens.push_back("<|eom_id|>"); + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET); + } + }, + // DeepSeek-R1-Distill-Qwen + [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { + if (tmpl.src.find( + "{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>'") != + std::string::npos) { + analysis.markers.tool_section_start = "<|tool▁calls▁begin|>"; + analysis.markers.tool_section_end = "<|tool▁calls▁end|>"; + analysis.markers.per_call_start = "<|tool▁call▁begin|>function"; + analysis.markers.func_name_prefix = "<|tool▁sep|>"; + analysis.markers.per_call_end = "<|tool▁call▁end|>"; + analysis.markers.func_close = "```"; + } + } }); + +// Common JSON structures +static json params_schema = { + { "type", "object" }, + { "properties", + { { "first", { { "type", "string" }, { "description", "First argument" } } }, + { "second", { { "type", "string" }, { "description", "Second argument" } } } } }, + { "required", json::array({}) } +}; + +static json tools = json::array({ + { { "type", "function" }, + { "function", + json{ { "name", "foofoo" }, { "description", "Test function foo" }, { "parameters", params_schema } } } }, + { { "type", "function" }, + { "function", + json{ { "name", "barbar" }, { "description", "Test function bar" }, { "parameters", params_schema } } } } +}); + +static json user_msg = json{ + { "role", "user" }, + { "content", "Hello" } +}; + +static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call00001") { + return json{ + { "id", id }, + { "type", "function" }, + { "function", json{ { "name", name }, { "arguments", args } } } + }; +} + +static json first_tool_call_zero_args = build_tool_call("foofoo", json::object(), "call00001"); +static json first_tool_call_one_arg = build_tool_call("foofoo", + json{ + { "first", "XXXX" } +}, + "call00001"); +static json first_tool_call_one_arg_other_val = build_tool_call("foofoo", + json{ + { "first", "YYYY" } +}, + "call00001"); +static json first_tool_call_other_arg = build_tool_call("foofoo", + json{ + { "second", "YYYY" } +}, + "call00001"); +static json first_tool_call = build_tool_call("foofoo", + json{ + { "first", "XXXX" }, + { "second", "YYYY" } +}, + "call00001"); +static json second_tool_call = build_tool_call("barbar", + json{ + { "first", "XXXX" }, + { "second", "YYYY" } +}, + "call00002"); +// Tool call variants with different IDs for call_id detection +static json first_tool_call_alt_id = build_tool_call("foofoo", + json{ + { "first", "XXXX" }, + { "second", "YYYY" } +}, + "call99999"); + +std::string differential_analyzer::apply_template(const common_chat_template & tmpl, const template_params & params) { + templates_params tmpl_params; + tmpl_params.messages = params.messages; + tmpl_params.tools = params.tools; + tmpl_params.add_generation_prompt = params.add_generation_prompt; + tmpl_params.enable_thinking = params.enable_thinking; + + if (params.extra_context) { + tmpl_params.extra_context = *params.extra_context; + } + tmpl_params.extra_context["enable_thinking"] = params.enable_thinking; + + try { + return common_chat_template_direct_apply(tmpl, tmpl_params); + } catch (const std::exception & e) { + LOG_DBG("Template application failed: %s\n", e.what()); + return ""; + } +} + +std::optional differential_analyzer::compare_variants( + const common_chat_template & tmpl, + const template_params & params_A, + const std::function & params_modifier) { + // Create variant B by copying A + template_params params_B = params_A; + + // Apply modifier to create variant B + if (params_modifier) { + params_modifier(params_B); + } + + // Apply template to both variants + std::string output_A = apply_template(tmpl, params_A); + std::string output_B = apply_template(tmpl, params_B); + + // Check for template application failures + if (output_A.empty() || output_B.empty()) { + return std::nullopt; + } + + // Calculate diff and return result with both outputs + compare_variants_result result; + result.diff = calculate_diff_split(output_A, output_B); + result.output_A = output_A; + result.output_B = output_B; + + return result; +} + +diff_analysis_result differential_analyzer::analyze(const common_chat_template & tmpl) { + diff_analysis_result result; + + LOG_DBG(ANSI_PURPLE "=== Starting differential analysis ===\n" ANSI_RESET); + + auto caps = tmpl.original_caps(); + result.supports_tools = caps.supports_tools || caps.supports_tool_calls; + result.supports_parallel_calls = caps.supports_parallel_tool_calls; + + analyze_reasoning(tmpl, result); + analyze_content(tmpl, result); + if (result.supports_tools) { + analyze_tools(tmpl, result); + } + collect_preserved_tokens(result); + + for (auto & workaround : workarounds) { + workaround(tmpl, result); + } + + LOG_DBG(ANSI_PURPLE "=== Differential analysis complete ===\n" ANSI_RESET); + + return result; +} + +void differential_analyzer::analyze_reasoning(const common_chat_template & tmpl, diff_analysis_result & result) { + LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET); + + compare_reasoning_presence(tmpl, result); + compare_thinking_enabled(tmpl, result); + if (result.supports_tools) { + compare_reasoning_scope(tmpl, result); + } +} + +void differential_analyzer::compare_reasoning_presence(const common_chat_template & tmpl, + diff_analysis_result & result) { + json user_msg = json{ + { "role", "user" }, + { "content", "Hello" } + }; + + json assistant_no_reasoning = json{ + { "role", "assistant" }, + { "content", "I can help." } + }; + + json assistant_with_reasoning = json{ + { "role", "assistant" }, + { "content", "I can help." }, + { "reasoning_content", "Let me think about this." } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_reasoning }); + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "R1: Template application failed, skipping reasoning detection\n" ANSI_RESET); + return; + } + + const auto & diff = comparison->diff; + + LOG_DBG(ANSI_ORANGE "R1 diff - suffix: " ANSI_RESET "'%s', " ANSI_ORANGE "left: " ANSI_RESET "'%s', " ANSI_ORANGE + "right: " ANSI_ORANGE "'%s'\n" ANSI_RESET, + diff.suffix.c_str(), diff.left.c_str(), diff.right.c_str()); + + const std::string reasoning_content = "Let me think about this."; + + if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) { + auto seg = segmentize_markers(diff.right); + if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) { + // easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace) + result.reasoning = reasoning_mode::TAG_BASED; + result.markers.reasoning_start = trim_whitespace(seg[0].value); + result.markers.reasoning_end = trim_leading_whitespace(seg[2].value); + for (size_t i = 3; i < seg.size(); i++) { + result.markers.reasoning_end += seg[i].value; + } + // we always truncate because this doesn't really influence correctness but model might not always generate newline + result.markers.reasoning_end = trim_whitespace(result.markers.reasoning_end); + } else if (seg.size() >= 2 && trim_whitespace(seg[0].value) == reasoning_content) { + // delimited + result.reasoning = reasoning_mode::DELIMITER; + result.markers.reasoning_end = trim_leading_whitespace(seg[1].value); + for (size_t i = 2; i < seg.size(); i++) { + result.markers.reasoning_end += seg[i].value; + } + result.markers.reasoning_end = trim_whitespace(result.markers.reasoning_end); + } else if (seg.size() == 1 && trim_whitespace(seg[0].value) == reasoning_content) { + // the marker might be in the prefix actually, let's check for case of + // left: empty + // right: reasoning_content + // suffix: content + // prefix: ... + auto suf_seg = segmentize_markers(diff.suffix); + if (trim_whitespace(diff.left).empty() && suf_seg.size() >= 2 && suf_seg[0].type == segment_type::MARKER && + trim_whitespace(suf_seg[1].value).substr(0, 11) == "I can help.") { + auto pre_seg = segmentize_markers(diff.prefix); + if (pre_seg[pre_seg.size() - 1].type == segment_type::MARKER || + (pre_seg.size() > 1 && trim_whitespace(pre_seg[pre_seg.size() - 1].value).empty() && + pre_seg[pre_seg.size() - 2].type == segment_type::MARKER)) { + auto marker_seg = pre_seg[pre_seg.size() - 1]; + if (marker_seg.type == segment_type::TEXT) { + marker_seg = pre_seg[pre_seg.size() - 2]; + } + result.reasoning = reasoning_mode::FORCED_CLOSED; + result.markers.reasoning_start = trim_whitespace(marker_seg.value); + result.markers.reasoning_end = trim_whitespace(suf_seg[0].value); + } + } + } + } +} + +void differential_analyzer::compare_thinking_enabled(const common_chat_template & tmpl, diff_analysis_result & result) { + json user_msg = json{ + { "role", "user" }, + { "content", "Hello" } + }; + + template_params params; + params.messages = json::array({ user_msg }); + params.add_generation_prompt = true; + params.enable_thinking = false; + + auto comparison = compare_variants(tmpl, params, [&](template_params & p) { p.enable_thinking = true; }); + + if (!comparison) { + LOG_DBG("R2: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + + LOG_DBG("R2 diff - suffix: '%s', left: '%s', right: '%s'\n", diff.suffix.c_str(), diff.left.c_str(), + diff.right.c_str()); + + std::string left_trimmed = diff.left; + trim_whitespace(left_trimmed); + + if (left_trimmed.empty() && !diff.right.empty()) { + std::string right_trimmed = diff.right; + trim_whitespace(right_trimmed); + + if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) { + if (result.markers.reasoning_start.empty()) { + result.markers.reasoning_start = right_trimmed; + result.reasoning = reasoning_mode::FORCED_OPEN; + LOG_DBG("R2: Detected forced-open reasoning with start marker: '%s'\n", right_trimmed.c_str()); + } + } + } + + if (result.markers.reasoning_start.empty() && !result.markers.reasoning_end.empty()) { + result.reasoning = reasoning_mode::DELIMITER; + LOG_DBG("R2: Delimiter-based reasoning detected (empty start, end: '%s')\n", + result.markers.reasoning_end.c_str()); + } + + // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers, + // but enable_thinking=true produces only the start marker + if (!comparison->output_A.empty() && !comparison->output_B.empty()) { + std::string output_A = comparison->output_A; // enable_thinking=false + std::string output_B = comparison->output_B; // enable_thinking=true + + // Both should end with the assistant role marker + // Check if output_A has both reasoning_start and reasoning_end markers + // while output_B has only reasoning_start + if (!result.markers.reasoning_start.empty()) { + // Check if output_A contains both start and end markers + bool A_has_start = output_A.find(result.markers.reasoning_start) != std::string::npos; + bool A_has_end = !result.markers.reasoning_end.empty() && + output_A.find(result.markers.reasoning_end) != std::string::npos; + + // Check if output_B contains only the start marker (and not the end marker) + bool B_has_start = output_B.find(result.markers.reasoning_start) != std::string::npos; + bool B_has_end = !result.markers.reasoning_end.empty() && + output_B.find(result.markers.reasoning_end) != std::string::npos; + + // For FORCED_CLOSED: A should have both, B should have only start + if (A_has_start && A_has_end && B_has_start && !B_has_end) { + result.reasoning = reasoning_mode::FORCED_CLOSED; + LOG_DBG("R2: Detected forced-closed reasoning\n"); + } + } else if (!result.markers.reasoning_end.empty()) { + // We might not have detected the reasoning open marker until now, + // but this is another chance to do so + auto diff = comparison->diff; + auto diff_rt = trim_whitespace(diff.right); + auto diff_lt = trim_whitespace(diff.left); + if (diff_rt.empty() && diff_lt == result.markers.reasoning_end) { + auto seg = segmentize_markers(trim_whitespace(diff.prefix)); + if (!seg.empty() && seg[seg.size() - 1].type == MARKER) { // this is FORCED_CLOSED + result.markers.reasoning_start = seg[seg.size() - 1].value; + result.reasoning = reasoning_mode::FORCED_CLOSED; + } + } + } + } + + // Check for slash-in-tag pattern: vs + // diff shows: suffix="think>", left="/", right="" (or vice versa) + if (result.markers.reasoning_start.empty() && result.markers.reasoning_end.empty()) { + if (diff.right.empty() && trim_whitespace(diff.left) == "/") { + auto seg_A = segmentize_markers(trim_trailing_whitespace(comparison->output_A)); + auto seg_B = segmentize_markers(trim_trailing_whitespace(comparison->output_B)); + if (!seg_A.empty() && !seg_B.empty() && seg_A[seg_A.size() - 1].type == segment_type::MARKER && + seg_B[seg_B.size() - 1].type == segment_type::MARKER) { + result.reasoning = reasoning_mode::FORCED_CLOSED; + result.markers.reasoning_start = seg_B[seg_B.size() - 1].value; + result.markers.reasoning_end = seg_A[seg_A.size() - 1].value; + } + } + } +} + +void differential_analyzer::compare_reasoning_scope(const common_chat_template & tmpl, diff_analysis_result & result) { + json assistant_reasoning_content = json{ + { "role", "assistant" }, + { "content", "Here is my response." }, + { "reasoning_content", "Let me think." } + }; + + json assistant_reasoning_tools = json{ + { "role", "assistant" }, + { "content", nullptr }, + { "reasoning_content", "Let me think." }, + { "tool_calls", + json::array({ build_tool_call("foofoo", json{ { "first", "VVVV" }, { "second", "XXXX" } }) }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_reasoning_content }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); }); + + if (!comparison) { + LOG_DBG("R3: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + + std::string reasoning_content = "Let me think."; + + LOG_DBG("R3 diff - prefix: '%s', suffix: '%s', left: '%s', right: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str(), + diff.left.c_str(), diff.right.c_str()); + + // Check if reasoning only appears in variant B (with tools) + bool reasoning_in_A = comparison->output_A.find(reasoning_content) != std::string::npos; + bool reasoning_in_B = comparison->output_B.find(reasoning_content) != std::string::npos; + + if (!reasoning_in_A && reasoning_in_B) { + result.reasoning = reasoning_mode::TOOLS_ONLY; + LOG_DBG("R3: Detected TOOLS_ONLY reasoning mode\n"); + + // Extract reasoning markers from output_B + // The reasoning_content is "Let me think." + size_t reasoning_pos = comparison->output_B.find(reasoning_content); + if (reasoning_pos != std::string::npos) { + // Find start marker before reasoning_content + std::string before_reasoning = comparison->output_B.substr(0, reasoning_pos); + before_reasoning = trim_trailing_whitespace(before_reasoning); + auto segments_before = segmentize_markers(before_reasoning); + std::reverse(segments_before.begin(), segments_before.end()); + + for (auto & segment : segments_before) { + if (segment.type == segment_type::MARKER) { + result.markers.reasoning_start = segment.value; + LOG_DBG("R3: Found reasoning_start: '%s'\n", result.markers.reasoning_start.c_str()); + break; + } + } + + // Find end marker after reasoning_content + size_t reasoning_end = reasoning_pos + reasoning_content.length(); + std::string after_reasoning = comparison->output_B.substr(reasoning_end); + after_reasoning = trim_leading_whitespace(after_reasoning); + + if (!after_reasoning.empty()) { + // Try to find matching end marker + if (!result.markers.reasoning_start.empty()) { + auto segments = segmentize_markers(after_reasoning); + for (auto & segment : segments) { + if (segment.type == segment_type::MARKER) { + result.markers.reasoning_end = segment.value; + break; + } + } + if (!result.markers.reasoning_end.empty()) { + LOG_DBG("R3: Found reasoning_end (matched): '%s'\n", result.markers.reasoning_end.c_str()); + } + } + } + } + } +} + +void differential_analyzer::analyze_content(const common_chat_template & tmpl, diff_analysis_result & result) { + LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET); + + compare_content_values(tmpl, result); +} + +void differential_analyzer::compare_content_values(const common_chat_template & tmpl, diff_analysis_result & result) { + json assistant_content_only = json{ + { "role", "assistant" }, + { "content", "Response text" } + }; + + json assistant_with_tools = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ build_tool_call("test_func", json{ { "arg1", "value1" } }) }) } + }; + + json assistant_with_reasoning = json{ + { "role", "assistant" }, + { "content", "" }, + { "reasoning_content", "Need to think" } + }; + + template_params params_content_only; + params_content_only.messages = json::array({ user_msg, assistant_content_only }); + params_content_only.add_generation_prompt = false; + params_content_only.enable_thinking = true; + params_content_only.tools = tools; + + auto comparison_with_tools = compare_variants(tmpl, params_content_only, [&](template_params & p) { + p.messages = json::array({ user_msg, assistant_with_tools }); + }); + + auto comparison_with_reasoning = compare_variants(tmpl, params_content_only, [&](template_params & p) { + p.messages = json::array({ user_msg, assistant_with_reasoning }); + }); + + if (!comparison_with_tools || !comparison_with_reasoning) { + LOG_DBG("C1: Template application failed\n"); + return; + } + + const auto & diff_tools = comparison_with_tools->diff; + const auto & diff_reasoning = comparison_with_reasoning->diff; + + std::string response = "Response text"; + + bool found_plain_content = false; + if (trim_whitespace(diff_tools.left) == response) { + auto segments = segmentize_markers(diff_reasoning.left); + if (trim_whitespace(diff_reasoning.left) == response || + (segments.size() == 2 && trim_whitespace(segments[0].value) == response)) { + // We only have the content text in the diff (possibly with a stray EOG marker), so no markers + LOG_DBG("C1: No content markers\n"); + result.content = content_mode::PLAIN; + found_plain_content = true; + } else if (result.reasoning == reasoning_mode::FORCED_CLOSED && + diff_reasoning.left.find(result.markers.reasoning_end) != std::string::npos) { + std::string post_closed_reasoning = diff_reasoning.left.substr( + diff_reasoning.left.find(result.markers.reasoning_end) + result.markers.reasoning_end.length()); + if (trim_whitespace(post_closed_reasoning) == "Response text") { + LOG_DBG("C1: No content markers after stripping reasoning close marker\n"); + result.content = content_mode::PLAIN; + found_plain_content = true; + } + } + } + if (!found_plain_content) { + std::string rdiff = diff_reasoning.left; + if (!result.markers.reasoning_end.empty() && rdiff.find(result.markers.reasoning_end) != std::string::npos) { + rdiff = rdiff.substr(rdiff.find(result.markers.reasoning_end) + result.markers.reasoning_end.length()); + } + // Take the more promising diff + std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left; + size_t pos = pure_content.find("Response text"); + if (pos == std::string::npos) { + LOG_DBG("C1: Error: response text not found - improper template application?"); + return; + } + result.markers.content_start = trim_leading_whitespace(pure_content.substr(0, pos)); + result.markers.content_end = + trim_leading_whitespace(pure_content.substr(pos + 13)); // 13 - len of "Response text" + // TODO: WRAPPED_WITH_REASONING + } + + // Determine content mode + if (!result.markers.content_start.empty() || !result.markers.content_end.empty()) { + result.content = content_mode::ALWAYS_WRAPPED; + LOG_DBG("C1: Content is ALWAYS_WRAPPED\n"); + // TODO: END_DELIMITED content mode - delimited at end but not at start? + } +} + +void differential_analyzer::analyze_tool_call_format(const std::string & haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + diff_analysis_result & result) { + if (fun_name_needle.empty() || arg_name_needle.empty() || haystack.empty()) { + return; + } + + auto in_json_haystack = [&haystack](const std::string & needle) -> bool { + // Find the needle in the haystack + size_t needle_pos = haystack.find(needle); + if (needle_pos == std::string::npos) { + return false; + } + if (needle_pos < 2) { + return false; // not enough space for a JSON structure + } + if (haystack[needle_pos - 1] == '\'' || haystack[needle_pos - 1] == '"') { + int cur = needle_pos - 2; + for (; cur >= 0 && std::isspace(haystack[cur]); cur--) { + } + if (haystack[cur] == ':' || haystack[cur] == '{') { + return true; + } + } + return false; + }; + + if (in_json_haystack(fun_name_needle)) { + // no need to check further, we're in JSON land + result.tools = tool_format::JSON_NATIVE; + } else if (in_json_haystack(arg_name_needle)) { + result.tools = tool_format::TAG_WITH_JSON; + } else { + result.tools = tool_format::TAG_WITH_TAGGED; + } + + // first, remove any reasoning markers + std::string clean_haystack = haystack; + if (!result.markers.reasoning_start.empty()) { + auto pos = haystack.find(result.markers.reasoning_start); + if (pos != std::string::npos) { + clean_haystack = haystack.substr(0, pos) + haystack.substr(pos + result.markers.reasoning_start.length()); + } + } + if (!result.markers.reasoning_end.empty()) { + auto pos = clean_haystack.find(result.markers.reasoning_end); + if (pos != std::string::npos) { + clean_haystack = + clean_haystack.substr(0, pos) + clean_haystack.substr(pos + result.markers.reasoning_end.length()); + } + } + + if (result.tools == tool_format::JSON_NATIVE) { + analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle, result); + } else { + analyze_tool_call_format_non_json(clean_haystack, fun_name_needle, result); + } + // always relax whitespace requirements on ending markers since they don't influence content + result.markers.tool_section_end = trim_whitespace(result.markers.tool_section_end); + result.markers.per_call_end = trim_whitespace(result.markers.per_call_end); +} + +void differential_analyzer::analyze_tool_call_format_json_native(const std::string & clean_haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + diff_analysis_result & result) { + // we might not have the typical OpenAI tool calling structure + int json_start = clean_haystack.find_first_of('{'); + int json_end = clean_haystack.find_last_of('}'); + json call_struct = json::parse(clean_haystack.substr(json_start, json_end - json_start + 1)); + auto register_field = [&](const std::string & prefix, + const nlohmann::detail::iteration_proxy_value & subel) { + if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) { + result.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) { + result.name_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + } else if (subel.value().dump().find(arg_name_needle) != + std::string::npos) { // handle both string and JSON obj variants + result.args_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + } else if (subel.key().find("id") != std::string::npos) { + // heuristics for generated id field + result.gen_id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + } + }; + for (const auto & el : call_struct.items()) { + if (el.key() == fun_name_needle) { + result.fun_name_is_key = true; + // When function name is the key, there's no name field and args are direct + result.name_field.clear(); + result.args_field.clear(); + // Don't register this element - the function name IS the key, not a field + } else { + if (el.value().is_object() && + el.value().dump().find(arg_name_needle) == std::string::npos) { // not the args object + result.function_field = el.key(); + for (const auto & subel : el.value().items()) { + register_field(el.key(), subel); + } + } + // Register this element as a potential field + register_field("", el); + } + } + // TODO: support for generated (not provided) tool call IDs + auto space_or_bracket = [](bool opening, char c) -> bool { + return std::isspace(c) || (opening ? c == '[' : c == ']'); + }; + // now let's check if we're in an array construction, mark it if so and get out of it + if (json_start > 0 && space_or_bracket(true, clean_haystack[json_start - 1])) { + for (--json_start; space_or_bracket(true, clean_haystack[json_start]) && json_start >= 0; json_start--) { + if (clean_haystack[json_start] == '[') { + result.tools_array_wrapped = true; + break; + } + } + if (!result.tools_array_wrapped) { + json_start++; // we ate into the last pre-json character + } + } + if (json_end < (int) clean_haystack.length() - 1 && space_or_bracket(false, clean_haystack[json_end + 1])) { + for (++json_end; + space_or_bracket(false, clean_haystack[json_end]) && json_end < (int) clean_haystack.length() - 1; + json_end++) { + } + } + + std::vector> located_params; + if (!result.name_field.empty()) { + located_params.push_back({ clean_haystack.find(result.name_field), result.name_field }); + } + if (!result.args_field.empty()) { + located_params.push_back({ clean_haystack.find(result.args_field), result.args_field }); + } + if (!result.id_field.empty()) { + located_params.push_back({ clean_haystack.find(result.id_field), result.id_field }); + } + if (!result.gen_id_field.empty()) { + located_params.push_back({ clean_haystack.find(result.gen_id_field), result.gen_id_field }); + } + std::sort(located_params.begin(), located_params.end()); + for (auto & pair : located_params) { + result.parameter_order.push_back(pair.second); + } + // we can immediately extract tool calling markers too + result.markers.tool_section_start = trim_leading_whitespace(clean_haystack.substr(0, json_start)); + result.markers.tool_section_end = trim_whitespace(clean_haystack.substr(json_end)); + // When tools_array_wrapped is true, the closing bracket is part of the array structure, + // not a separate section end marker. Clear tool_section_end to avoid duplicate brackets. + if (result.tools_array_wrapped && result.markers.tool_section_end == "]") { + result.markers.tool_section_end.clear(); + } +} + +void differential_analyzer::analyze_tool_call_format_non_json(const std::string & clean_haystack, + const std::string & fun_name_needle, + diff_analysis_result & result) { + // we need to split by markers... + auto haystack_split = segmentize_markers(trim_leading_whitespace(clean_haystack)); + int where_is_nemo = 0; + int i = 0; + for (auto & segment : haystack_split) { + if (segment.value.find(fun_name_needle) != std::string::npos) { + where_is_nemo = i; + break; + } + i++; + } + + // basically the rule here is: + // - we append everything adjacent to a marker to the marker (treat it as part of the marker) + // - we assume symmetry (as many opening as closing markers) + // - we count the number of opening markers and then try to move backwards from the end until we've + // eaten as many closing markers as there were opening markers + if (where_is_nemo > 1) { // we might have more than one marker set here + std::vector preceding_markers; + for (int seg = where_is_nemo - 1; seg >= 0; seg--) { + if (haystack_split[seg].type == MARKER) { + preceding_markers.push_back(haystack_split[seg]); + } + } + size_t how_many_markers = preceding_markers.size(); + if (how_many_markers > 1) { + bool had_marker = false; + for (int seg = where_is_nemo - 1; seg >= 0; seg--) { + if (haystack_split[seg].type == MARKER) { + if (!had_marker) { + had_marker = true; + result.markers.per_call_start = haystack_split[seg].value + result.markers.per_call_start; + } else { + result.markers.tool_section_start = + haystack_split[seg].value + result.markers.tool_section_start; + } + } else { + if (had_marker) { + result.markers.tool_section_start = + haystack_split[seg].value + result.markers.tool_section_start; + } else { + result.markers.per_call_start = haystack_split[seg].value + result.markers.per_call_start; + } + } + } + had_marker = false; + size_t backtracked_so_far = 0; + for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) { + if (haystack_split[seg].type == MARKER) { + backtracked_so_far++; + if (!had_marker) { + had_marker = true; + result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end; + } else { + result.markers.per_call_end = haystack_split[seg].value + result.markers.per_call_end; + } + } else { + if (had_marker) { + result.markers.per_call_end = haystack_split[seg].value + result.markers.per_call_end; + } else { + result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end; + } + } + if (backtracked_so_far >= how_many_markers) { + break; + } + } + } else { + for (int seg = 0; seg < where_is_nemo; seg++) { + result.markers.tool_section_start += haystack_split[seg].value; + } + for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) { + result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end; + if (haystack_split[seg].type == segment_type::MARKER) { + break; + } + } + } + } else { + result.markers.tool_section_start += haystack_split[0].value; + for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) { + result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end; + if (haystack_split[seg].type == segment_type::MARKER) { + break; + } + } + } +} + +void differential_analyzer::analyze_tools(const common_chat_template & tmpl, diff_analysis_result & result) { + LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET); + analyze_tool_calls(tmpl, result); + + if (result.tools == tool_format::NONE) { + LOG_DBG("T1: No tool support found\n"); + // Continue anyway - we may still have useful markers + } else if (result.tools != tool_format::JSON_NATIVE) { + if (result.supports_parallel_calls) { + check_per_call_markers(tmpl, result); + } + extract_function_markers(tmpl, result); + extract_argument_separator(tmpl, result); + extract_args_markers(tmpl, result); + extract_call_id_markers(tmpl, result); + if (result.tools == tool_format::TAG_WITH_TAGGED) { + analyze_arguments(tmpl, result); + } + } +} + +void differential_analyzer::check_per_call_markers(const common_chat_template & tmpl, diff_analysis_result & result) { + json assistant_one_tool = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + json assistant_two_tools = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call, second_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_one_tool }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto one_vs_two = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); }); + + if (!one_vs_two) { + LOG_DBG("T2: Generating double tool call comparison failed\n"); + return; + } + + std::string second_tool_content = trim_leading_whitespace(one_vs_two->diff.right); + if (!result.markers.tool_section_start.empty() && + second_tool_content.find(result.markers.tool_section_start) == 0) { + result.markers.per_call_start = result.markers.tool_section_start; + result.markers.per_call_end = result.markers.tool_section_end; + result.markers.tool_section_start.clear(); + result.markers.tool_section_end.clear(); + } +} + +void differential_analyzer::analyze_tool_calls(const common_chat_template & tmpl, diff_analysis_result & result) { + json assistant_no_tools = json{ + { "role", "assistant" }, + { "content", "Response." } + }; + + json assistant_with_tools = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_tools }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); }); + + if (!comparison) { + LOG_DBG("T1: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + LOG_DBG("T1 diff - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str()); + LOG_DBG("T1 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); + + std::string tool_section = diff.right; + + if (tool_section.empty()) { + return; + } + + analyze_tool_call_format(tool_section, "foofoo", "first", result); + + LOG_DBG("T1: tool_section_start='%s', tool_section_end='%s'\n", result.markers.tool_section_start.c_str(), + result.markers.tool_section_end.c_str()); +} + +void differential_analyzer::extract_call_separator(const common_chat_template & tmpl, + diff_analysis_result & result, + std::string & second_call_content) { + json assistant_one_call = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + json assistant_two_calls = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call, second_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_one_call }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_calls }); }); + + if (!comparison) { + LOG_DBG("T2: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + LOG_DBG("T2 diff - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str()); + LOG_DBG("T2 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); + + if (!diff.right.empty()) { + std::string first_func_name = "foofoo"; + std::string second_func_name = "barbar"; + + std::string separator = until_common_prefix(diff.right, first_func_name, second_func_name); + result.markers.call_separator = trim_whitespace(separator); + + LOG_DBG("T2: call_separator='%s'\n", result.markers.call_separator.c_str()); + + result.supports_parallel_calls = true; + second_call_content = diff.right; + + LOG_DBG("T2: second_call_content='%s', supports_parallel_calls=true\n", second_call_content.c_str()); + } +} + +void differential_analyzer::extract_function_markers(const common_chat_template & tmpl, diff_analysis_result & result) { + json assistant_nocall = json{ + { "role", "assistant" }, + { "content", "BBBB" }, + }; + + json assistant_foofoo = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + json assistant_barbar = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ second_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_foofoo }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); }); + + if (!comparison) { + LOG_DBG("T3: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + LOG_DBG("T3 diff - suffix: '%s'\n", diff.suffix.c_str()); + LOG_DBG("T3 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); + + if (diff.left.find("foofoo") != std::string::npos && diff.right.find("barbar") != std::string::npos) { + std::string prefix_marker; + if (!result.markers.per_call_start.empty()) { + prefix_marker = result.markers.per_call_start; + } else { + prefix_marker = result.markers.tool_section_start; + } + if (!prefix_marker.empty() && diff.prefix.rfind(prefix_marker) != std::string::npos) { + result.markers.func_name_prefix = + diff.prefix.substr(diff.prefix.rfind(prefix_marker) + prefix_marker.size()); + } + + auto seg = segmentize_markers(diff.left); + for (const auto & s : seg) { + if (s.value.find("foofoo") == std::string::npos) { + result.markers.func_name_prefix += s.value; + } else { + size_t pos = s.value.find("foofoo"); + std::string pre = s.value.substr(0, pos); + std::string post = s.value.substr(pos + 6); // 6 = len("foofoo") + result.markers.func_name_prefix += pre; + result.markers.func_name_suffix += post; + break; + } + } + + auto seg_suf = segmentize_markers(diff.suffix); + size_t stop = 0; + size_t stop_internal_pos = 0; + for (const auto & ss : seg_suf) { + bool has_needle = false; + if (result.tools == tool_format::TAG_WITH_JSON) { + has_needle = (ss.type == segment_type::TEXT && ss.value.find_first_of("{[") != std::string::npos); + if (has_needle) { + stop_internal_pos = ss.value.find_first_of("{["); + break; + } + } else { + has_needle = ss.value.find("first") != std::string::npos; + if (has_needle) { + stop_internal_pos = ss.value.find("first"); + break; + } + } + stop++; + } + if (stop < seg_suf.size() - 1) { + if (result.tools == tool_format::TAG_WITH_TAGGED) { + size_t how_far = 0; + if (stop > 0) { + if (seg_suf[stop].type == segment_type::MARKER) { + how_far = stop; + } else { + how_far = stop - 1; + } + for (size_t i = 0; i < how_far; i++) { + result.markers.func_name_suffix += seg_suf[i].value; + } + } + } else { + for (size_t i = 0; i < stop; i++) { + result.markers.func_name_suffix += seg_suf[i].value; + } + const std::string & stopper = seg_suf[stop].value; + result.markers.func_name_suffix += stopper.substr(0, stop_internal_pos); + } + } + + // now just to find the closer + std::string suffix_marker; + if (!result.markers.per_call_end.empty()) { + suffix_marker = result.markers.per_call_end; + } else { + suffix_marker = result.markers.tool_section_end; + } + std::string closer_suffix; + if (suffix_marker.empty()) { + // we'll have to rely on an extra diff with no-calls version + auto notool_comp = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); }); + auto nt_diff = notool_comp->diff; + closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4); + } else { + closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker)); + } + if (!closer_suffix.empty()) { + auto closer_seg = segmentize_markers(closer_suffix); + bool need_to_eat_arg_marker = (result.tools == tool_format::TAG_WITH_TAGGED); + size_t last_arg_seg = closer_seg.size() - 1; + for (int i = (int) closer_seg.size() - 1; i >= 0; i--) { + if (closer_seg[i].value.find("YYYY") != std::string::npos) { + last_arg_seg = i; + } + } + if (result.tools == tool_format::TAG_WITH_JSON) { + const auto & entire_seg = closer_seg[last_arg_seg].value; + size_t pos = entire_seg.find_last_of("}]"); + if (pos != std::string::npos && pos < entire_seg.size() - 1) { + result.markers.func_close = trim_leading_whitespace(entire_seg.substr(pos + 1)); + } + } + for (size_t i = last_arg_seg + 1; i < closer_seg.size(); i++) { + if (closer_seg[i].type == segment_type::MARKER) { + if (need_to_eat_arg_marker) { + need_to_eat_arg_marker = false; + } else { + result.markers.func_close += closer_seg[i].value; + } + } else if (!need_to_eat_arg_marker) { + result.markers.func_close += closer_seg[i].value; + } + } + } + result.markers.func_close = trim_leading_whitespace(result.markers.func_close); + + LOG_DBG("T3: func_name_prefix='%s', func_name_suffix='%s', func_close='%s'\n", + result.markers.func_name_prefix.c_str(), result.markers.func_name_suffix.c_str(), + result.markers.func_close.c_str()); + } +} + +void differential_analyzer::extract_argument_separator(const common_chat_template & tmpl, + diff_analysis_result & result) { + json assistant_one_arg = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + json assistant_two_args = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_one_arg }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); }); + + if (!comparison) { + LOG_DBG("T4: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + LOG_DBG("T4 diff - suffix: '%s'\n", diff.suffix.c_str()); + LOG_DBG("T4 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); + + if (!diff.right.empty()) { + std::string separator = until_common_prefix(diff.right, "first", "second"); + result.markers.arg_separator = separator; + LOG_DBG("T4: arg_separator='%s'\n", result.markers.arg_separator.c_str()); + } +} + +void differential_analyzer::extract_args_markers(const common_chat_template & tmpl, diff_analysis_result & result) { + json assistant_no_args = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_zero_args }) } + }; + + json assistant_with_args = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_args }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); }); + + if (!comparison) { + LOG_DBG("T5: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + LOG_DBG("T5 diff - suffix: '%s'\n", diff.suffix.c_str()); + LOG_DBG("T5 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); + + if (result.markers.args_start.empty() && result.tools != tool_format::JSON_NATIVE) { + std::string prefix_marker = !result.markers.tool_section_start.empty() ? result.markers.tool_section_start : + result.markers.per_call_start; + std::string suffix_marker = + !result.markers.tool_section_end.empty() ? result.markers.tool_section_end : result.markers.per_call_end; + // these might happen earlier in the tools section as an example or somewhere else, so we need to find the closest ones + size_t prefix_pos = prefix_marker.empty() ? 0 : diff.prefix.rfind(prefix_marker); + size_t suffix_pos = suffix_marker.empty() ? diff.suffix.size() : diff.suffix.find(suffix_marker); + if (prefix_pos == std::string::npos) { + prefix_pos = 0; + } + if (suffix_pos == std::string::npos) { + suffix_pos = diff.suffix.size(); + } + std::string prefix_cut = diff.prefix.substr(prefix_pos + prefix_marker.size()); + std::string suffix_cut = diff.suffix.substr(0, suffix_pos); + std::string args_start = until_common_prefix(prefix_cut, "{}", "{\"first\":"); + std::string args_end = after_common_suffix(suffix_cut, "{}", "\"XXXX\"}"); + + if (!args_start.empty() || !args_end.empty()) { + result.markers.args_start = args_start; + result.markers.args_end = args_end; + LOG_DBG("T5: Custom argument container detected: start='%s', end='%s'\n", args_start.c_str(), + args_end.c_str()); + } + } +} + +void differential_analyzer::extract_call_id_markers(const common_chat_template & tmpl, diff_analysis_result & result) { + json assistant_id1 = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + json assistant_id2 = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_alt_id }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_id1 }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); }); + + if (!comparison) { + LOG_DBG("T6: Template application failed for call_id detection\n"); + return; + } + + const auto & diff = comparison->diff; + LOG_DBG("T6 diff (call_id) - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str()); + LOG_DBG("T6 diff (call_id) - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); + + if (diff.left.empty() && diff.right.empty()) { + LOG_DBG("T6: No call_id difference detected\n"); + return; + } + + std::string id_value_1 = "call00001"; + std::string id_value_2 = "call99999"; + + size_t common_id_prefix_len = 0; + for (size_t i = 0; i < std::min(id_value_1.length(), id_value_2.length()); i++) { + if (id_value_1[i] == id_value_2[i]) { + common_id_prefix_len++; + } else { + break; + } + } + std::string common_id_part = id_value_1.substr(0, common_id_prefix_len); + + // Check if the function name is in the prefix (normal case: BETWEEN_FUNC_AND_ARGS or POST_ARGS) + // or in the suffix (call_id is PRE_FUNC_NAME) + std::string func_name = "foofoo"; + size_t func_name_in_prefix = diff.prefix.rfind(func_name); + size_t func_name_in_suffix = diff.suffix.find(func_name); + + if (func_name_in_prefix != std::string::npos && func_name_in_suffix == std::string::npos) { + // Function name is only in prefix - call_id is BETWEEN_FUNC_AND_ARGS or POST_ARGS + // Check if args indicator "{" is in prefix or suffix + size_t args_in_prefix = diff.prefix.find('{', func_name_in_prefix); + size_t args_in_suffix = diff.suffix.find('{'); + + if (args_in_suffix != std::string::npos && + (args_in_prefix == std::string::npos || args_in_prefix > diff.prefix.length())) { + // Args are in suffix, so call_id is BETWEEN_FUNC_AND_ARGS + result.call_id_pos = call_id_position::BETWEEN_FUNC_AND_ARGS; + LOG_DBG("T6: Detected BETWEEN_FUNC_AND_ARGS position\n"); + + // The prefix ends with: ... + // Segmentize to find the call_id_prefix marker + std::string after_func = diff.prefix.substr(func_name_in_prefix + func_name.length()); + auto segments = segmentize_markers(after_func); + + std::string marker_before_id; + for (size_t i = 0; i < segments.size(); i++) { + if (segments[i].type == segment_type::MARKER) { + // Check if the next segment (if any) contains the common_id_part + if (i + 1 < segments.size() && segments[i + 1].value.find(common_id_part) != std::string::npos) { + marker_before_id = segments[i].value; + break; + } + // Or if this is the last marker and the text after contains common_id_part + if (i == segments.size() - 1 || + (i + 1 < segments.size() && segments[i + 1].type == segment_type::TEXT && + segments[i + 1].value.find(common_id_part) != std::string::npos)) { + marker_before_id = segments[i].value; + } + } + } + + if (!marker_before_id.empty()) { + result.markers.call_id_prefix = marker_before_id; + LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str()); + } else { + // Fallback: look for the last marker in after_func + for (int i = (int) segments.size() - 1; i >= 0; i--) { + if (segments[i].type == segment_type::MARKER) { + result.markers.call_id_prefix = segments[i].value; + LOG_DBG("T6: call_id_prefix (fallback)='%s'\n", result.markers.call_id_prefix.c_str()); + break; + } + } + } + + // Extract call_id_suffix: the first marker in the suffix before args + auto suffix_segments = segmentize_markers(diff.suffix); + for (size_t i = 0; i < suffix_segments.size(); i++) { + if (suffix_segments[i].type == segment_type::MARKER) { + result.markers.call_id_suffix = suffix_segments[i].value; + LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str()); + break; + } + // Stop if we hit the args + if (suffix_segments[i].value.find('{') != std::string::npos) { + break; + } + } + } else if (args_in_prefix != std::string::npos) { + // Args are in prefix, so call_id is POST_ARGS + result.call_id_pos = call_id_position::POST_ARGS; + LOG_DBG("T6: POST_ARGS call_id position detected\n"); + + // Extract markers from between args and the ID + std::string after_args = diff.prefix.substr(args_in_prefix); + size_t closing_brace = after_args.rfind('}'); + if (closing_brace != std::string::npos) { + std::string between_args_and_id = after_args.substr(closing_brace + 1); + auto segments = segmentize_markers(between_args_and_id); + for (int i = (int) segments.size() - 1; i >= 0; i--) { + if (segments[i].type == segment_type::MARKER) { + result.markers.call_id_prefix = segments[i].value; + LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str()); + break; + } + } + } + + // call_id_suffix would be in the suffix (first marker) + auto suffix_segments = segmentize_markers(diff.suffix); + for (const auto & seg : suffix_segments) { + if (seg.type == segment_type::MARKER) { + result.markers.call_id_suffix = seg.value; + LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str()); + break; + } + } + } + } else if (func_name_in_suffix != std::string::npos && func_name_in_prefix == std::string::npos) { + // Function name is only in suffix - call_id is PRE_FUNC_NAME + result.call_id_pos = call_id_position::PRE_FUNC_NAME; + LOG_DBG("T6: PRE_FUNC_NAME call_id position detected\n"); + + // Extract call_id_prefix from prefix (last marker before the common_id_part) + auto prefix_segments = segmentize_markers(diff.prefix); + for (int i = (int) prefix_segments.size() - 1; i >= 0; i--) { + if (prefix_segments[i].type == segment_type::MARKER) { + result.markers.call_id_prefix = prefix_segments[i].value; + LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str()); + break; + } + } + + // Extract call_id_suffix from suffix (first marker before func_name) + std::string before_func = diff.suffix.substr(0, func_name_in_suffix); + auto suffix_segments = segmentize_markers(before_func); + for (const auto & seg : suffix_segments) { + if (seg.type == segment_type::MARKER) { + result.markers.call_id_suffix = seg.value; + LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str()); + break; + } + } + } else { + LOG_DBG("T6: Unable to determine call_id position\n"); + } + + // When call_id is detected, per_call_end may have been incorrectly set to include + // the call_id_suffix and sample args. Clear it if it starts with call_id_suffix. + if (result.call_id_pos != call_id_position::NONE && !result.markers.call_id_suffix.empty() && + result.markers.per_call_end.find(result.markers.call_id_suffix) == 0) { + result.markers.per_call_end.clear(); + LOG_DBG("T6: Cleared per_call_end (was incorrectly including call_id_suffix)\n"); + } +} + +void differential_analyzer::analyze_arguments(const common_chat_template & tmpl, diff_analysis_result & result) { + LOG_DBG(ANSI_ORANGE "Phase 4: Argument analysis\n" ANSI_RESET); + + extract_argument_name_markers(tmpl, result); + extract_argument_value_markers(tmpl, result); +} + +void differential_analyzer::extract_argument_name_markers(const common_chat_template & tmpl, + diff_analysis_result & result) { + json assistant_first_arg = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + json assistant_second_arg = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_other_arg }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_first_arg }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); }); + + if (!comparison) { + LOG_DBG("A1: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + LOG_DBG("A1 diff - suffix: '%s', left: '%s', right: '%s'\n", diff.suffix.c_str(), diff.left.c_str(), + diff.right.c_str()); + + if (!diff.left.empty() && !diff.right.empty()) { + size_t common_len = 0; + size_t min_len = std::min(diff.left.length(), diff.right.length()); + while (common_len < min_len && diff.left[common_len] == diff.right[common_len]) { + common_len++; + } + + if (common_len > 0) { // we have a marker structure with the name *inside* the marker + std::string common_prefix = diff.left.substr(0, common_len); + std::string left_remainder = diff.left.substr(common_len); + std::string right_remainder = diff.right.substr(common_len); + size_t left_close = + left_remainder.find_first_of("\"X"); // because arg-val is XXXX, can be quoted or unquoted + size_t right_close = right_remainder.find_first_of("\"Y"); // here arg-val is YYYY + + if (left_close != std::string::npos && right_close != std::string::npos) { + std::string left_name = left_remainder.substr(0, 5); // 5 = len("first") + std::string right_name = right_remainder.substr(0, 6); // 6 = len("second") + + if (left_name == "first" && right_name == "second") { + result.markers.arg_name_prefix = trim_whitespace(common_prefix); + std::string suffix_left = left_remainder.substr(5, left_close - 5); + std::string suffix_right = right_remainder.substr(6, right_close - 6); + if (suffix_left == suffix_right) { + result.markers.arg_name_suffix = trim_leading_whitespace(suffix_left); + } + LOG_DBG("A1: arg_name_prefix='%s', arg_name_suffix='%s'\n", result.markers.arg_name_prefix.c_str(), + result.markers.arg_name_suffix.c_str()); + } + } + } else if (diff.left.substr(0, 5) == "first" && diff.right.substr(0, 6) == "second") { + // we most likely have actual markers for argument names + auto pre_seg = segmentize_markers(diff.prefix); + for (int i = pre_seg.size() - 1; i >= 0; i--) { + result.markers.arg_name_prefix = result.markers.arg_name_prefix + pre_seg[i].value; + if (pre_seg[i].type == segment_type::MARKER) { + break; + } + } + auto left_seg = segmentize_markers(diff.left); + if (left_seg.size() == 1) { // only the name + maybe extra whitespace / normal chars in differing part + result.markers.arg_name_suffix = diff.left.substr(5); + auto suf_seg = segmentize_markers(diff.suffix); + for (size_t i = 0; i < suf_seg.size(); i++) { + result.markers.arg_name_suffix += suf_seg[i].value; + if (suf_seg[i].type == segment_type::MARKER) { + if (i < suf_seg.size() - 2 && suf_seg[i + 1].type == segment_type::TEXT && + trim_whitespace(suf_seg[i + 1].value).empty()) { + // we need to include post-marker whitespace/newlines as well + result.markers.arg_name_suffix += suf_seg[i + 1].value; + } + break; + } + } + } else { + for (size_t i = 0; i < left_seg.size(); i++) { + std::string to_add; + if (i == 0) { + to_add = left_seg[i].value.substr(5); + } else { + to_add = left_seg[i].value; + } + result.markers.arg_name_suffix += to_add; + if (left_seg[i].type == segment_type::MARKER) { + if (i < left_seg.size() - 2 && left_seg[i + 1].type == segment_type::TEXT && + trim_whitespace(left_seg[i + 1].value).empty()) { + // we need to include post-marker whitespace/newlines as well + result.markers.arg_name_suffix += left_seg[i + 1].value; + } + break; + } + } + } + } + } +} + +void differential_analyzer::extract_argument_value_markers(const common_chat_template & tmpl, + diff_analysis_result & result) { + json assistant_val_X = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + json assistant_val_Y = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg_other_val }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_val_X }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); }); + + if (!comparison) { + LOG_DBG("A2: Template application failed\n"); + return; + } + + const auto & diff = comparison->diff; + LOG_DBG("A2 diff - suffix: '%s'\n", diff.suffix.c_str()); + LOG_DBG("A2 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); + + if (diff.left == "XXXX" && diff.right == "YYYY") { + std::string arg_name_ending = "first" + result.markers.arg_name_suffix; + std::string prefix = diff.prefix; + if (prefix.rfind(arg_name_ending) != std::string::npos) { + prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size()); + } + if (!prefix.empty()) { + auto seg_pre = segmentize_markers(prefix); + for (int i = seg_pre.size() - 1; i >= 0; i--) { + result.markers.arg_value_prefix = seg_pre[i].value + result.markers.arg_value_prefix; + if (seg_pre[i].type == segment_type::MARKER) { + break; + } + } + } + + std::string value_suffix = diff.suffix; + if (!result.markers.func_close.empty()) { + size_t func_close_pos = value_suffix.find(result.markers.func_close); + if (func_close_pos != std::string::npos) { + value_suffix = value_suffix.substr(0, func_close_pos); + } + } else if (!result.markers.per_call_end.empty() || !result.markers.tool_section_end.empty()) { + std::string end_marker = + !result.markers.per_call_end.empty() ? result.markers.per_call_end : result.markers.tool_section_end; + size_t end_marker_pos = value_suffix.find(end_marker); + if (end_marker_pos != std::string::npos) { + value_suffix = value_suffix.substr(0, end_marker_pos); + } + } + value_suffix = trim_leading_whitespace(value_suffix); + if (!value_suffix.empty()) { + result.markers.arg_value_suffix = value_suffix; + } + + LOG_DBG("A2: arg_value_prefix='%s', arg_value_suffix='%s'\n", result.markers.arg_value_prefix.c_str(), + result.markers.arg_value_suffix.c_str()); + } +} + +void differential_analyzer::collect_preserved_tokens(diff_analysis_result & result) { + auto & tokens = result.preserved_tokens; + + auto add_token = [&tokens](const std::string & org_token) { + std::string token = trim_whitespace(org_token); + if (!token.empty()) { + // Avoid duplicates + if (std::find(tokens.begin(), tokens.end(), token) == tokens.end()) { + tokens.push_back(token); + } + } + }; + + add_token(result.markers.reasoning_start); + add_token(result.markers.reasoning_end); + add_token(result.markers.content_start); + add_token(result.markers.content_end); + add_token(result.markers.tool_section_start); + add_token(result.markers.tool_section_end); + add_token(result.markers.per_call_start); + add_token(result.markers.per_call_end); + add_token(result.markers.func_name_prefix); + add_token(result.markers.func_name_suffix); + add_token(result.markers.func_close); + add_token(result.markers.arg_name_prefix); + add_token(result.markers.arg_name_suffix); + add_token(result.markers.arg_separator); + add_token(result.markers.arg_value_prefix); + add_token(result.markers.arg_value_suffix); + add_token(result.markers.call_id_prefix); + add_token(result.markers.call_id_suffix); + add_token(result.markers.code_block_marker); +} diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h new file mode 100644 index 0000000000..b1c601181e --- /dev/null +++ b/common/chat-diff-analyzer.h @@ -0,0 +1,347 @@ +#pragma once + +#include "chat.h" +#include "nlohmann/json.hpp" + +#include +#include +#include +#include +#include + +using json = nlohmann::ordered_json; + +// ============================================================================ +// Parameters for template application +// ============================================================================ +struct template_params { + json messages; + json tools; + bool add_generation_prompt = false; + bool enable_thinking = true; + std::optional extra_context = std::nullopt; +}; + +struct diff_split { + std::string prefix; + std::string suffix; + std::string left; + std::string right; + + bool operator==(struct diff_split & other) const { + return prefix == other.prefix && suffix == other.suffix && left == other.left && right == other.right; + } +}; + +// Result of compare_variants containing diff and original outputs +struct compare_variants_result { + diff_split diff; + std::string output_A; + std::string output_B; +}; + +// ============================================================================ +// Marker Registry: All markers extracted via differential analysis +// ============================================================================ + +// Markers extracted from differential analysis of template outputs +// Each marker is derived from a specific comparison in the analysis matrix +struct marker_registry { + // === Reasoning markers (from Phase 1: R1-R3) === + std::string reasoning_start; // e.g., "", "[THINK]", "<|START_THINKING|>", "" + std::string reasoning_end; // e.g., "", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>" + + // === Content markers (from Phase 2: C1-C2) === + std::string content_start; // e.g., "", ">>>all\n", "" + std::string content_end; // e.g., "", "" + + // === Tool section markers (from Phase 3: T1-T2) === + std::string tool_section_start; // e.g., "", "[TOOL_CALLS]", "" + std::string tool_section_end; // e.g., "", "" + std::string per_call_start; // e.g., "<|tool_call_begin|>", "" (for multi-call templates) + std::string per_call_end; // e.g., "<|tool_call_end|>", "" + std::string call_separator; // e.g., ",", "\n", "" (between multiple calls) + + // === Function markers (from Phase 3: T3-T5) === + std::string func_name_prefix; // e.g., "", "\"", ":0" + std::string func_close; // e.g., "", "" (for tag-based) + std::string args_start; // e.g., "{", "<|tool_call_argument_begin|>" + std::string args_end; // e.g., "}", "" + + // === Argument markers (from Phase 4: A1-A3, for tagged args format) === + std::string arg_name_prefix; // e.g., "", "\"" + std::string arg_name_suffix; // e.g., ">", "", "\":" + std::string arg_value_prefix; // e.g., "", "", "" + std::string arg_value_suffix; // e.g., "", "", "" + std::string arg_separator; // e.g., "", "\n", "," + + // === Call ID markers (for non-JSON formats with tool call IDs) === + std::string call_id_prefix; // e.g., "[CALL_ID]" (marker before call ID value) + std::string call_id_suffix; // e.g., "" (marker after call ID value, before next section) + + // === Special markers === + std::string code_block_marker; // e.g., "Action:" (for markdown code block format) + std::string code_block_language; // e.g., "json" + std::string function_namespace; // e.g., "functions." (for prefixed-indexed format) +}; + + +// ============================================================================ +// Analysis Result Enums +// ============================================================================ + +// Reasoning handling mode (derived from R1-R3 comparisons) +enum class reasoning_mode { + NONE, // No reasoning markers detected + TAG_BASED, // Standard tag-based: ... + DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter) + FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end) + FORCED_CLOSED,// Template ends with open reasoning tag on enabled thinking but + // with both opened and closed tag for disabled thinking + TOOLS_ONLY // Only reason on tool calls, not on normal content +}; + +inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) { + switch (mode) { + case reasoning_mode::NONE: + return os << "NONE"; + case reasoning_mode::TAG_BASED: + return os << "TAG_BASED"; + case reasoning_mode::DELIMITER: + return os << "DELIMITER"; + case reasoning_mode::FORCED_OPEN: + return os << "FORCED_OPEN"; + case reasoning_mode::FORCED_CLOSED: + return os << "FORCED_CLOSED"; + case reasoning_mode::TOOLS_ONLY: + return os << "TOOLS_ONLY"; + default: + return os << "UNKNOWN"; + } +} + +// Content wrapping mode (derived from C1 comparison) +enum class content_mode { + PLAIN, // No content markers + ALWAYS_WRAPPED, // Content always wrapped with markers + WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present +}; + +inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) { + switch (mode) { + case content_mode::PLAIN: + return os << "PLAIN"; + case content_mode::ALWAYS_WRAPPED: + return os << "ALWAYS_WRAPPED"; + case content_mode::WRAPPED_WITH_REASONING: + return os << "WRAPPED_WITH_REASONING"; + default: + return os << "UNKNOWN"; + } +} + +// Call ID position in tool calls (for non-JSON formats) +enum class call_id_position { + NONE, // No call ID support detected + PRE_FUNC_NAME, // Call ID before function name: [CALL_ID]id[FUNC]name{args} + BETWEEN_FUNC_AND_ARGS, // Call ID between function and args: [FUNC]name[CALL_ID]id{args} + POST_ARGS, // Call ID after arguments: [FUNC]name{args}[CALL_ID]id +}; + +inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) { + switch (pos) { + case call_id_position::NONE: + return os << "NONE"; + case call_id_position::PRE_FUNC_NAME: + return os << "PRE_FUNC_NAME"; + case call_id_position::BETWEEN_FUNC_AND_ARGS: + return os << "BETWEEN_FUNC_AND_ARGS"; + case call_id_position::POST_ARGS: + return os << "POST_ARGS"; + default: + return os << "UNKNOWN"; + } +} + +// Tool call format classification (derived from T1-T5, A1-A3 comparisons) +enum class tool_format { + NONE, // No tool support detected + JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}} + TAG_WITH_JSON, // Tag-based with JSON args: {...} + BRACKET_TAG, // Bracket-tag: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} + PREFIXED_INDEXED, // Prefixed-indexed: functions.X:0{...} + RECIPIENT_BASED, // Recipient routing: >>>func_name\n{...} + TAG_WITH_TAGGED, // Tag-based with tagged args: value + MARKDOWN_BLOCK, // Markdown code block: Action:\n```json\n[...]\n``` +}; + +inline std::ostream & operator<<(std::ostream & os, const tool_format & format) { + switch (format) { + case tool_format::NONE: + return os << "NONE"; + case tool_format::JSON_NATIVE: + return os << "JSON_NATIVE"; + case tool_format::TAG_WITH_JSON: + return os << "TAG_WITH_JSON"; + case tool_format::BRACKET_TAG: + return os << "BRACKET_TAG"; + case tool_format::PREFIXED_INDEXED: + return os << "PREFIXED_INDEXED"; + case tool_format::RECIPIENT_BASED: + return os << "RECIPIENT_BASED"; + case tool_format::TAG_WITH_TAGGED: + return os << "TAG_WITH_TAGGED"; + case tool_format::MARKDOWN_BLOCK: + return os << "MARKDOWN_BLOCK"; + default: + return os << "UNKNOWN"; + } +} + +// Complete result of differential analysis +struct diff_analysis_result { + // Classification results + reasoning_mode reasoning = reasoning_mode::NONE; + content_mode content = content_mode::PLAIN; + tool_format tools = tool_format::NONE; + + // All extracted markers + marker_registry markers; + + // JSON field names (for JSON-based formats) + bool fun_name_is_key = false; + std::string function_field = "function"; + std::string name_field = "name"; + std::string args_field = "arguments"; + std::string id_field; + std::string gen_id_field; + std::vector parameter_order; + + // Call ID position (for non-JSON formats) + call_id_position call_id_pos = call_id_position::NONE; + + // Flags + bool supports_tools = false; + bool supports_parallel_calls = false; + bool requires_nonnull_content = false; + bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...] + + // Preserved tokens for tokenizer (union of all non-empty markers) + std::vector preserved_tokens; +}; + +// Performs systematic differential analysis on chat templates +// Uses comparison matrix to extract markers without heuristics +class differential_analyzer { + public: + // Main entry point: Run full differential analysis on a template + static diff_analysis_result analyze(const common_chat_template & tmpl); + + // Phase-specific analysis (can be called individually for testing) + static void analyze_reasoning(const common_chat_template & tmpl, diff_analysis_result & result); + static void analyze_content(const common_chat_template & tmpl, diff_analysis_result & result); + static void analyze_tools(const common_chat_template & tmpl, diff_analysis_result & result); + static void analyze_arguments(const common_chat_template & tmpl, diff_analysis_result & result); + + // Factorized differential comparison function (public for testing) + // Takes base params and a single modifier lambda to create variant B + // Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure + static std::optional compare_variants( + const common_chat_template & tmpl, + const template_params & params_A, + const std::function & params_modifier); + + private: + // Comparison helpers (implement the comparison matrix from the plan) + + // R1: Extract reasoning markers by comparing with/without reasoning_content + static void compare_reasoning_presence(const common_chat_template & tmpl, diff_analysis_result & result); + + // R2: Detect forced-open reasoning by comparing enable_thinking=false vs true + static void compare_thinking_enabled(const common_chat_template & tmpl, diff_analysis_result & result); + + // R3: Detect reasoning scope (content-only vs with tools) + static void compare_reasoning_scope(const common_chat_template & tmpl, diff_analysis_result & result); + + // C1: Extract content markers by comparing different content values + static void compare_content_values(const common_chat_template & tmpl, diff_analysis_result & result); + + // T1: Analyze the tool calls + static void analyze_tool_calls(const common_chat_template & tmpl, diff_analysis_result & result); + + // Analyzes a tool call section to determine the format used (pure JSON, function name markers, or full markers) + static void analyze_tool_call_format(const std::string & haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + diff_analysis_result & result); + + // Helper functions to handle the two branches of analyze_tool_call_format + static void analyze_tool_call_format_json_native(const std::string & clean_haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + diff_analysis_result & result); + + static void analyze_tool_call_format_non_json(const std::string & clean_haystack, + const std::string & fun_name_needle, + diff_analysis_result & result); + + // T2: Check if markers are per call or per section + static void check_per_call_markers(const common_chat_template & tmpl, diff_analysis_result & result); + + // T3: Extract call separator; also outputs second_call_content for per-call detection + static void extract_call_separator(const common_chat_template & tmpl, diff_analysis_result & result, + std::string & second_call_content); + + // T4: Analyze function name format and extract markers + static void extract_function_markers(const common_chat_template & tmpl, + diff_analysis_result & result); + + // T5: Extract argument separator + static void extract_argument_separator(const common_chat_template & tmpl, diff_analysis_result & result); + + // T6: Extract args container markers + static void extract_args_markers(const common_chat_template & tmpl, diff_analysis_result & result); + + // A1: Extract argument name markers + static void extract_argument_name_markers(const common_chat_template & tmpl, diff_analysis_result & result); + + // A2: Extract argument value markers + static void extract_argument_value_markers(const common_chat_template & tmpl, diff_analysis_result & result); + + // T7: Extract call ID markers (for non-JSON formats) + static void extract_call_id_markers(const common_chat_template & tmpl, diff_analysis_result & result); + + // Classify tool format based on extracted markers + static void classify_tool_format(diff_analysis_result & result); + + // Classification helpers + static void collect_preserved_tokens(diff_analysis_result & result); + + // Utility: Apply template with given parameters + static std::string apply_template(const common_chat_template & tmpl, + const template_params & params); +}; + +enum segment_type { + TEXT, + MARKER +}; + +inline std::ostream & operator<<(std::ostream & os, const segment_type & type) { + switch (type) { + case segment_type::TEXT: + return os << "TEXT"; + case segment_type::MARKER: + return os << "MARKER"; + default: + return os << "UNKNOWN"; + } +} + +struct segment { + segment_type type; + std::string value; + + segment(segment_type type, std::string value) : type(type), value(std::move(value)) {} +}; \ No newline at end of file diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index ba49ecf29b..f72bece7b0 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -148,585 +148,6 @@ common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::stri return zero_or_more(choice({ p, content_chunk })); } -common_peg_parser common_chat_peg_unified_builder::build_reasoning_block(const content_structure & cs, - common_reasoning_format reasoning_format, - bool thinking_forced_open) { - // If reasoning is explicitly disabled, return empty - if (reasoning_format == COMMON_REASONING_FORMAT_NONE) { - return eps(); - } - - // Get reasoning markers - use from content_structure or fallback for DEEPSEEK format - std::string reason_start = cs.reasoning_start; - std::string reason_end = cs.reasoning_end; - - // If DEEPSEEK format is specified but markers weren't detected, use fallback markers - if ((reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK || - reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY) && - (reason_start.empty() || reason_end.empty())) { - // Try standard DeepSeek markers - if (reason_start.empty()) { - reason_start = ""; - } - if (reason_end.empty()) { - reason_end = ""; - } - } - - // If still no markers, return empty - // But allow empty start marker if thinking is forced open (implicit start) - if ((reason_start.empty() && !thinking_forced_open) || reason_end.empty()) { - return eps(); - } - - if (thinking_forced_open) { - // Mandatory reasoning: parse from current position to end marker - auto parser = reasoning(until(reason_end)) + literal(reason_end); - return rule("reasoning", reasoning_block(parser)); - } - // Optional reasoning: may or may not appear - // Also try <|START_THINKING|> style markers if standard markers don't match - auto standard_reasoning = - reasoning_block(literal(reason_start) + reasoning(until(reason_end)) + literal(reason_end)); - - // For templates that use <|START_THINKING|> style markers - if (reason_start == "" && reason_end == "") { - auto alt_reasoning = reasoning_block(literal("<|START_THINKING|>") + reasoning(until("<|END_THINKING|>")) + - literal("<|END_THINKING|>")); - return optional(rule("reasoning", choice({ standard_reasoning, alt_reasoning }))); - } - - return optional(rule("reasoning", standard_reasoning)); -} - -common_peg_parser common_chat_peg_unified_builder::build_content_block(const content_structure & cs, - common_reasoning_format reasoning_format, - const std::string & tool_section_start) { - GGML_UNUSED(tool_section_start); // leaving for now just in case - std::string content_start = cs.content_start; - std::string content_end = cs.content_end; - - // Add fallback content markers for DEEPSEEK format if not detected - // Some templates use tags for content when reasoning is enabled - if ((reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK || - reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY) && - (content_start.empty() || content_end.empty())) { - content_start = ""; - content_end = ""; - } - - // Handle content markers with both start and end - if (cs.content_mode != content_structure::CONTENT_PLAIN && !cs.content_start.empty() && !cs.content_end.empty()) { - // Content is wrapped in markers - if (reasoning_format == COMMON_REASONING_FORMAT_NONE) { - // When reasoning_format=NONE, preserve any content before the content start marker - // (this may include reasoning/thinking markers that the model generates). - // This applies even if reasoning markers weren't detected by the analyzer. - auto with_markers = content(until(cs.content_start)) + literal(cs.content_start) + - content(until(cs.content_end)) + literal(cs.content_end); - // Fallback: content wrapped in end marker only (start marker might be in prompt) - auto implicit_markers = content(until(cs.content_end)) + literal(cs.content_end); - auto without_markers = content(rest()); - return choice({ with_markers, implicit_markers, without_markers }); - } // When reasoning is parsed separately, content starts directly after reasoning block - auto with_markers = literal(cs.content_start) + content(until(cs.content_end)) + literal(cs.content_end); - auto implicit_markers = content(until(cs.content_end)) + literal(cs.content_end); - auto without_markers = content(rest()); - return choice({ with_markers, implicit_markers, without_markers }); - } - - // Handle content with only start marker (no end marker) - // This is for formats like recipient-based (Functionary v3.2) where content is prefixed with - // a marker but has no explicit closing marker - content ends at end of message or before tool calls - if (cs.content_mode != content_structure::CONTENT_PLAIN && !cs.content_start.empty() && cs.content_end.empty()) { - if (reasoning_format == COMMON_REASONING_FORMAT_NONE) { - // Preserve any content before the start marker, then consume the marker and capture rest - auto with_start_marker = content(until(cs.content_start)) + literal(cs.content_start) + content(rest()); - auto without_markers = content(rest()); - return choice({ with_start_marker, without_markers }); - } // Content starts directly after reasoning block - auto with_start_marker = literal(cs.content_start) + content(rest()); - auto without_markers = content(rest()); - return choice({ with_start_marker, without_markers }); - } - - // For DEEPSEEK format, try fallback content markers even if not detected - if (!content_start.empty() && !content_end.empty()) { - auto with_markers = literal(content_start) + content(until(content_end)) + literal(content_end); - auto without_markers = content(rest()); - return choice({ with_markers, without_markers }); - } - - // Plain content - capture rest - return content(rest()); -} - -common_peg_parser common_chat_peg_unified_builder::build_tool_section(const tool_call_structure & ts, - const nlohmann::json & tools, - bool parallel_tool_calls, - bool force_tool_calls) { - if (!ts.supports_tools || !tools.is_array() || tools.empty()) { - return eps(); - } - - // Build tool choices based on function format - auto tool_choices = choice(); - - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - tool_choices |= rule("tool-" + name, build_function(ts, name, params)); - } - - // Build the section with or without markers - auto build_section = [&]() -> common_peg_parser { - // Markdown code block format (Cohere Command-R Plus): - // Action:\n```json\n[{...}]\n``` - if (ts.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK) { - // Build the opening: "Action:\n```json" - std::string code_fence_open = "```"; - if (!ts.code_block_language.empty()) { - code_fence_open += ts.code_block_language; - } - - auto opening = literal(ts.code_block_marker) + literal("\n") + literal(code_fence_open) + literal("\n"); - auto closing = literal("\n") + literal(ts.tool_section_end); // "\n```" - - // Build the JSON array of tool calls - // Don't use trigger_rule here since we're nested inside a sequence - auto tools_array = literal("[") + space(); - if (parallel_tool_calls) { - tools_array = tools_array + tool_choices; - tools_array = tools_array + zero_or_more(space() + literal(",") + space() + tool_choices); - } else { - tools_array = tools_array + optional(tool_choices); - } - tools_array = tools_array + space() + literal("]"); - - // Full section: Action:\n```json\n[{...}]\n``` - return trigger_rule("tool-call", opening + tools_array + closing); - } - - // Recipient-based format (Functionary v3.2): >>>function_name\n{arguments} - // Uses tool_section_start as delimiter, but no array wrapper or section markers - if (ts.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) { - auto tool_call = trigger_rule("tool-call", tool_choices); - if (parallel_tool_calls) { - // Multiple tool calls: each starts with >>> - return one_or_more(tool_call + space()); - } - return tool_call; - } - - if (!ts.tool_section_start.empty() && !ts.tool_section_end.empty()) { - // Check if this format has SEPARATE section markers and per-call markers. - // This happens when: - // - Section markers wrap the ENTIRE section (e.g., ...) - // - Function prefix contains its own per-call marker (e.g., ...) - // Example: DeepSeek R1 with section and call markers, Kimi-K2 with prefixed-indexed format - // We detect this by checking if function_prefix contains a per-call START marker - // (indicated by words like "call_begin", "call_start", or similar patterns) - bool has_separate_section_and_call_markers = false; - - // FUNC_PREFIXED_INDEXED and FUNC_BRACKET_TAG always have separate section and per-call markers - if (ts.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED || - ts.function_format == tool_call_structure::FUNC_BRACKET_TAG) { - has_separate_section_and_call_markers = true; - } else if (ts.function_format == tool_call_structure::FUNC_NAME_AS_KEY) { - // FUNC_NAME_AS_KEY uses comma-separated JSON objects in an array - // Format: [{"func1": args}, {"func2": args}] - // The brackets are included in section markers - auto tool_call = trigger_rule("tool-call", tool_choices); - auto tool_calls = tool_call; - if (parallel_tool_calls) { - tool_calls = tool_call + zero_or_more(space() + literal(",") + space() + tool_call); - } - return literal(ts.tool_section_start) + space() + tool_calls + space() + literal(ts.tool_section_end); - } else if (ts.function_format == tool_call_structure::FUNC_TAG_WITH_NAME && !ts.function_prefix.empty()) { - // Check if function_prefix contains a per-call marker like "" - // This differentiates DeepSeek R1 (where function_prefix has its own call marker) - // from Nemotron (where function_prefix is just " ... - auto tool_call = trigger_rule("tool-call", tool_choices); - auto tool_calls = parallel_tool_calls ? one_or_more(tool_call + space()) : tool_call; - return literal(ts.tool_section_start) + space() + tool_calls + space() + literal(ts.tool_section_end); - } // Each tool call has its own wrapper: tool - auto single_tool_section = - trigger_rule("tool-call", literal(ts.tool_section_start) + space() + tool_choices + space() + - literal(ts.tool_section_end)); - if (parallel_tool_calls) { - // Multiple wrapped tool calls - return one_or_more(single_tool_section + space()); - } - return single_tool_section; - } - if (!ts.tool_section_start.empty()) { - // Start marker only (no end marker) - e.g., <|tool_call|>[...] - // Wrap all tool calls in an array after the start marker - auto tools_array = literal("[") + space(); - if (parallel_tool_calls) { - tools_array = tools_array + tool_choices; - tools_array = tools_array + zero_or_more(space() + literal(",") + space() + tool_choices); - } else { - tools_array = tools_array + optional(tool_choices); - } - tools_array = tools_array + space() + literal("]"); - - return trigger_rule("tool-call", literal(ts.tool_section_start) + tools_array); - } // No section markers (raw JSON format, e.g., Llama 3.1) - // Use trigger rule since tool calls are identified by regex trigger on the grammar - if (parallel_tool_calls) { - return trigger_rule("tool-call", one_or_more(tool_choices + space())); - } - return trigger_rule("tool-call", tool_choices); - }; - - auto section = build_section(); - if (!force_tool_calls) { - section = optional(section); - } - - return section; -} - -common_peg_parser common_chat_peg_unified_builder::build_function(const tool_call_structure & ts, - const std::string & name, - const nlohmann::json & schema) { - auto args = build_arguments(ts, schema); - - switch (ts.function_format) { - case tool_call_structure::FUNC_JSON_OBJECT: - { - // Build JSON object parser that accepts id field in either position: - // - Before name: {"id": "...", "name": "X", "arguments": {...}} (R7B style) - // - After args: {"name": "X", "arguments": {...}, "id": "..."} (Mistral style) - auto tool_name_ = json_member(ts.name_field, "\"" + tool_name(literal(name)) + "\""); - auto tool_args_ = json_member(ts.args_field, tool_args(args)); - - // id can appear before name or after args - auto id_member = json_member(ts.id_field, tool_id(json_string())); - auto id_before = ts.id_field.empty() ? eps() : optional(id_member << space() << "," << space()); - auto id_after = ts.id_field.empty() ? eps() : optional(space() << "," << space() << id_member); - - return tool(tool_open(literal("{")) << space() << id_before // optional id before name (R7B style) - << tool_name_ << space() << "," << space() << tool_args_ - << id_after // optional id after args (Mistral style) - << zero_or_more(space() << "," << space() << json_string() - << space() << ":" << space() << json()) - << space() << "}"); - } - - case tool_call_structure::FUNC_TAG_WITH_NAME: - { - // Build tag parser: {...} - // Combine prefix + name + suffix into tool_open to ensure the tool is only created - // when the FULL opening tag is confirmed. This prevents partial name matches during - // incremental parsing (e.g., matching "special_function" when input is "special_function_") - auto opening = literal(ts.function_prefix) + tool_name(literal(name)) + literal(ts.function_suffix); - // Note: No space() before tool_close because function_close may start with newline - // (e.g., "\n```") and space() would consume it, preventing the literal match - return tool(tool_open(opening) + space() + tool_args(args) + tool_close(literal(ts.function_close))); - } - - case tool_call_structure::FUNC_TAG_NAME_ONLY: - { - // Build tag parser: ... - // Combine < + name + > into tool_open to prevent partial matches - auto opening = literal("<") + tool_name(literal(name)) + literal(">"); - return tool(tool_open(opening) + space() + tool_args(args) + space() + - tool_close(literal(""))); - } - - case tool_call_structure::FUNC_PREFIXED_INDEXED: - { - // Build prefixed-indexed parser (e.g., Kimi-K2): - // <|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{...}<|tool_call_end|> - // The index number after : is ignored (we use zero_or_more(digit) to skip it) - auto opening = literal(ts.per_call_start) + literal(ts.function_namespace) + tool_name(literal(name)) + - literal(":") + zero_or_more(chars("0-9", 1, 1)) + // Skip the index - literal(ts.args_marker); - return tool(tool_open(opening) + space() + tool_args(args) + space() + - tool_close(literal(ts.per_call_end))); - } - - case tool_call_structure::FUNC_NAME_AS_KEY: - { - // Build name-as-key parser (e.g., Apertus): - // {"function_name": {...arguments...}} - // The function name IS the JSON key, and arguments are the value directly - auto opening = literal("{\"") + tool_name(literal(name)) + literal("\":"); - return tool(tool_open(opening) + space() + tool_args(args) + space() + literal("}")); - } - - case tool_call_structure::FUNC_BRACKET_TAG: - { - // Build bracket-tag parser (e.g., Mistral Small 3.2): - // [TOOL_CALLS]function_name[CALL_ID]call_id[ARGS]{...} - // per_call_start = "[TOOL_CALLS]" - // id_marker = "[CALL_ID]" - // args_marker = "[ARGS]" - auto opening = literal(ts.per_call_start) + tool_name(literal(name)); - if (!ts.id_marker.empty()) { - // Add id_marker + id value (captured as tool_id) - opening = opening + literal(ts.id_marker) + tool_id(until(ts.args_marker)); - } - if (!ts.args_marker.empty()) { - opening = opening + literal(ts.args_marker); - } - // No explicit closer for this format (EOS terminates) - return tool(tool_open(opening) + space() + tool_args(args)); - } - - case tool_call_structure::FUNC_RECIPIENT_BASED: - { - // Build recipient-based parser (e.g., Functionary v3.2): - // >>>function_name - // {'param1': 'value1', 'param2': 'value2'} - // tool_section_start = ">>>" - // Function name directly follows ">>>" with newline, arguments are Python dict (parse as JSON) - auto opening = literal(ts.tool_section_start) + tool_name(literal(name)); - // No explicit closer (newline + arguments, then EOS or next >>>) - return tool(tool_open(opening) + space() + tool_args(args)); - } - - case tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK: - { - // Build markdown code block parser (e.g., Cohere Command-R Plus): - // Action: - // ```json - // [ - // { - // "tool_name": "function_name", - // "parameters": {...} - // } - // ] - // ``` - // The individual function is a JSON object within the array - auto tool_name_ = json_member(ts.name_field, "\"" + tool_name(literal(name)) + "\""); - auto tool_args_ = json_member(ts.args_field, tool_args(args)); - - // Build the JSON object: {"tool_name": "...", "parameters": {...}} - // Use same pattern as FUNC_JSON_OBJECT: tool_open with atomic wrapper - return tool(tool_open(literal("{")) << space() << tool_name_ << space() << "," << space() << tool_args_ - << zero_or_more(space() << "," << space() << json_string() - << space() << ":" << space() << json()) - << space() << "}"); - } - } - - return eps(); -} - -common_peg_parser common_chat_peg_unified_builder::build_arguments(const tool_call_structure & ts, - const nlohmann::json & params) { - switch (ts.argument_format) { - case tool_call_structure::ARGS_JSON: - { - // Standard JSON object arguments - if (params.is_object()) { - return schema(json(), "args", params); - } - return json(); - } - - case tool_call_structure::ARGS_TAGGED: - { - // Tagged arguments: value - if (!params.contains("properties") || params.at("properties").empty()) { - return eps(); - } - - auto arg_choice = choice(); - for (const auto & el : params.at("properties").items()) { - const std::string & prop_name = el.key(); - const auto & prop_schema = el.value(); - - // Check if the schema declares this as a string type - bool is_string_type = prop_schema.contains("type") && prop_schema.at("type") == "string"; - - auto arg_name_parser = choice( - { literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") }); - - // Use tool_arg_string_value for string types to prevent treating "[..." as JSON array - auto value_parser = is_string_type ? tool_arg_string_value(until(ts.arg_close)) - : tool_arg_value(until(ts.arg_close)); - - auto arg_rule = tool_arg(tool_arg_open(literal(ts.arg_prefix)) + tool_arg_name(arg_name_parser) + - literal(ts.arg_suffix) + value_parser + - tool_arg_close(literal(ts.arg_close)) + - (ts.arg_separator.empty() ? eps() : optional(literal(ts.arg_separator)))); - arg_choice |= arg_rule; - } - return zero_or_more(arg_choice + space()); - } - - case tool_call_structure::ARGS_KEY_VALUE_TAGS: - { - // Key-value tag arguments (GLM-4.6 style): - // key - // value - if (!params.contains("properties") || params.at("properties").empty()) { - return eps(); - } - - auto arg_choice = choice(); - for (const auto & el : params.at("properties").items()) { - const std::string & prop_name = el.key(); - const auto & prop_schema = el.value(); - - // Check if the schema declares this as a string type - bool is_string_type = prop_schema.contains("type") && prop_schema.at("type") == "string"; - - // Parse: key\nvalue - // ts.arg_prefix = "", ts.arg_suffix = "", ts.arg_close = "" - // Use tool_arg_string_value for string types to prevent treating "[..." as JSON array - auto value_parser = is_string_type ? tool_arg_string_value(until(ts.arg_close)) - : tool_arg_value(until(ts.arg_close)); - - auto arg_rule = tool_arg(tool_arg_open(literal(ts.arg_prefix)) + tool_arg_name(literal(prop_name)) + - literal(ts.arg_suffix) + // - space() + literal("") + value_parser + - tool_arg_close(literal(ts.arg_close))); - arg_choice |= arg_rule; - } - return zero_or_more(arg_choice + space()); - } - } - - return eps(); -} - -common_peg_parser common_chat_peg_unified_builder::standard_json_tools(const std::string & section_start, - const std::string & section_end, - const nlohmann::json & tools, - bool parallel_tool_calls, - bool force_tool_calls) { - if (!tools.is_array() || tools.empty()) { - return eps(); - } - - // Build tool choices for JSON format - auto tool_choices = choice(); - - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - // Build JSON object parser: {"name": "X", "arguments": {...}} - auto tool_name_ = json_member("name", "\"" + tool_name(literal(name)) + "\""); - auto tool_args_ = json_member("arguments", tool_args(schema(json(), "tool-" + name + "-schema", params))); - - auto tool_parser = - tool(tool_open(literal("{")) << space() << tool_name_ << space() << "," << space() << tool_args_ - << zero_or_more(space() << "," << space() << json_string() << space() << ":" - << space() << json()) - << space() << "}"); - - tool_choices |= rule("tool-" + name, tool_parser); - } - - // Build the section with markers - auto tool_calls = tool_choices; - if (parallel_tool_calls) { - tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices); - } - - auto section = - trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end)); - - return force_tool_calls ? section : optional(section); -} - -common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools( - const std::map & markers, - const nlohmann::json & tools, - bool parallel_tool_calls, - bool force_tool_calls) { - if (!tools.is_array() || tools.empty()) { - return eps(); - } - - // Extract markers with defaults - auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string { - auto it = markers.find(key); - return it != markers.end() ? it->second : default_val; - }; - - std::string section_start = get_marker("tool_call_start_marker", ""); - std::string section_end = get_marker("tool_call_end_marker", ""); - std::string func_opener = get_marker("function_opener", ""); - std::string func_closer = get_marker("function_closer", ""); - std::string param_key_prefix = get_marker("parameter_key_prefix", ""); - std::string param_closer = get_marker("parameter_closer", ""); - - // Build tool choices for tagged format - auto tool_choices = choice(); - - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - // Build argument parsers - auto args = eps(); - if (params.contains("properties") && !params["properties"].empty()) { - auto arg_choice = choice(); - for (const auto & el : params["properties"].items()) { - const std::string & prop_name = el.key(); - - auto arg_name_parser = - choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") }); - - auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) + - literal(param_key_suffix) + tool_arg_value(until(param_closer)) + - tool_arg_close(literal(param_closer))); - arg_choice |= arg_rule; - } - args = zero_or_more(arg_choice + space()); - } - - // Build function parser: args - auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) + - space() + tool_args(args) + space() + tool_close(literal(func_closer))); - - tool_choices |= rule("tool-" + name, tool_parser); - } - - // Build the section with markers - auto section = - parallel_tool_calls ? - trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) + - literal(section_end)) : - trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end)); - - return force_tool_calls ? section : optional(section); -} - void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & parse_result_arg) { // Call base class to visit all nodes @@ -734,7 +155,7 @@ void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & ar // Flush any pending tool call that was started but never got a name // This happens during partial parsing when the tool call is incomplete - if (pending_tool_call.has_value()) { + if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) { // Transfer any buffered arguments if (!args_buffer.empty()) { pending_tool_call->arguments = args_buffer; @@ -954,7 +375,6 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { current_tool->arguments += "\""; needs_closing_quote = false; } - // Close the arguments object if using tagged format if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') { current_tool->arguments += "}"; } @@ -982,3 +402,352 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { } } } + +common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools( + const std::map & markers, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls) { + if (!tools.is_array() || tools.empty()) { + return eps(); + } + + // Extract markers with defaults + auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string { + auto it = markers.find(key); + return it != markers.end() ? it->second : default_val; + }; + + std::string section_start = get_marker("tool_call_start_marker", ""); + std::string section_end = get_marker("tool_call_end_marker", ""); + std::string func_opener = get_marker("function_opener", ""); + std::string func_closer = get_marker("function_closer", ""); + std::string param_key_prefix = get_marker("parameter_key_prefix", ""); + std::string param_closer = get_marker("parameter_closer", ""); + + // Build tool choices for tagged format + auto tool_choices = choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build argument parsers + auto args = eps(); + if (params.contains("properties") && !params["properties"].empty()) { + auto arg_choice = choice(); + for (const auto & el : params["properties"].items()) { + const std::string & prop_name = el.key(); + + auto arg_name_parser = + choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") }); + + auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) + + literal(param_key_suffix) + tool_arg_value(until(param_closer)) + + tool_arg_close(literal(param_closer))); + arg_choice |= arg_rule; + } + args = zero_or_more(arg_choice + space()); + } + + // Build function parser: args + auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) + + space() + tool_args(args) + space() + tool_close(literal(func_closer))); + + tool_choices |= rule("tool-" + name, tool_parser); + } + + // Build the section with markers + auto section = + parallel_tool_calls ? + trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) + + literal(section_end)) : + trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end)); + + return force_tool_calls ? section : optional(section); +} + +// Helper: Parse dot notation key into prefix and field name +static std::pair parse_key_spec(const std::string & key) { + auto dot_pos = key.find('.'); + if (dot_pos == std::string::npos) { + return {"", key}; // Top-level field + } + return {key.substr(0, dot_pos), key.substr(dot_pos + 1)}; +} + +common_peg_parser common_chat_peg_unified_builder::standard_json_tools( + const std::string & section_start, + const std::string & section_end, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls, + const std::string & name_key, + const std::string & args_key, + bool array_wrapped, + bool function_is_key, + const std::string & call_id_key, + const std::string & gen_call_id_key, + const std::vector & parameters_order) { + if (!tools.is_array() || tools.empty()) { + return eps(); + } + + // Build tool choices for JSON format + auto tool_choices = choice(); + // auto other_member = json_string() + space() + literal(":") + space() + json(); + + // Determine effective field names + std::string effective_name_key = name_key.empty() ? "name" : name_key; + std::string effective_args_key = args_key.empty() ? "arguments" : args_key; + + // Check if we have nested keys (dot notation) + auto name_spec = parse_key_spec(effective_name_key); + auto args_spec = parse_key_spec(effective_args_key); + bool has_nested_keys = !name_spec.first.empty() || !args_spec.first.empty(); + + // Mode 1: function_is_key - parse {"function_name": {...}} + if (function_is_key) { + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build inner object fields + std::vector inner_fields; + + // Add optional string ID field + if (!call_id_key.empty()) { + auto id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + literal("\"") + tool_id(json_string_content()) + literal("\"") + ); + inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space()))); + } + + // Add optional generated integer ID field + if (!gen_call_id_key.empty()) { + auto gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space()))); + } + + // Add arguments - either wrapped in args_key or parsed directly + common_peg_parser args_parser = eps(); + if (args_key.empty()) { + // Arguments are directly the inner object value: {"func_name": {"arg1": "val"}} + args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params)); + } else { + // Arguments are wrapped in a key: {"func_name": {"arguments": {"arg1": "val"}}} + args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + } + inner_fields.push_back(args_parser); + + // Build inner object parser - no greedy other_member skipping to avoid consuming ID + common_peg_parser inner_object = eps(); + if (args_key.empty() && inner_fields.size() == 1) { + // Direct arguments: {"func_name": {"arg1": "val"}} + // The args_parser is already the full object schema + inner_object = inner_fields[0]; + } else { + // Wrapped arguments: {"func_name": {"arguments": {"arg1": "val"}}} + inner_object = literal("{") + space(); + for (size_t i = 0; i < inner_fields.size(); i++) { + inner_object = inner_object + inner_fields[i]; + if (i < inner_fields.size() - 1) { + inner_object = inner_object + space(); + } + } + inner_object = inner_object + space() + literal("}"); + } + + // Tool call format: { "function_name": { inner_object } } + auto tool_parser = tool( + tool_open(literal("{")) + space() + + literal("\"") + tool_name(literal(name)) + literal("\"") + + space() + literal(":") + space() + + inner_object + + space() + tool_close(literal("}")) + ); + + tool_choices |= rule("tool-" + name, tool_parser); + } + } + // Mode 2: Nested keys (dot notation like "function.name") + else if (has_nested_keys) { + // Group fields by prefix + std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first; + std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key; + std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key; + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build nested object with name and arguments + auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() + + literal("\"") + tool_name(literal(name)) + literal("\""); + auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + + auto nested_object = literal("{") + space() + + nested_name + space() + literal(",") + space() + + nested_args + + space() + literal("}"); + + // Build top-level parser - simpler structure without greedy other_member skipping + // Format: { id?, "function": {...} } + auto tool_parser_body = tool_open(literal("{")) + space(); + + // Add optional string ID field at top level + if (!call_id_key.empty()) { + auto id_spec = parse_key_spec(call_id_key); + if (id_spec.first.empty()) { // Top-level ID field + auto id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + literal("\"") + tool_id(json_string_content()) + literal("\"") + ); + tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space()); + } + } + + // Add optional generated integer ID field at top level + if (!gen_call_id_key.empty()) { + auto gen_id_spec = parse_key_spec(gen_call_id_key); + if (gen_id_spec.first.empty()) { // Top-level gen ID field + auto gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space()); + } + } + + // Add the nested object field + auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object; + tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}")); + + tool_choices |= rule("tool-" + name, tool(tool_parser_body)); + } + } + // Mode 3: Flat keys (enhanced with ID fields and parameter ordering) + else { + auto name_key_parser = literal("\"" + name_key + "\""); + auto args_key_parser = literal("\"" + args_key + "\""); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + auto tool_name_ = name_key_parser + space() + literal(":") + space() + + literal("\"") + tool_name(literal(name)) + literal("\""); + auto tool_args_ = args_key_parser + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + + // Build ID parsers if keys are provided + common_peg_parser id_parser = eps(); + if (!call_id_key.empty()) { + id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + } + + common_peg_parser gen_id_parser = eps(); + if (!gen_call_id_key.empty()) { + gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + } + + common_peg_parser tool_parser = eps(); + + // Use parameter ordering if provided - parse fields in specified order without greedy skipping + if (!parameters_order.empty()) { + } + // Build parser using parameter ordering (works with or without explicit parameters_order) + // Create list of (parser, key) pairs for all fields + std::vector> parser_pairs; + parser_pairs.emplace_back(tool_name_, effective_name_key); + parser_pairs.emplace_back(tool_args_, effective_args_key); + if (!call_id_key.empty()) { + parser_pairs.emplace_back(optional(id_parser), call_id_key); + } + if (!gen_call_id_key.empty()) { + parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key); + } + + // Sort by position in parameters_order (or at end if not present) + std::sort(parser_pairs.begin(), parser_pairs.end(), + [¶meters_order](const auto & a, const auto & b) { + auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second); + auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second); + size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a); + size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b); + return idx_a < idx_b; + }); + + // Build ordered parser + auto ordered_body = tool_open(literal("{")) + space(); + for (size_t i = 0; i < parser_pairs.size(); i++) { + ordered_body = ordered_body + parser_pairs[i].first; + if (i < parser_pairs.size() - 1) { + ordered_body = ordered_body + space() + literal(",") + space(); + } + } + ordered_body = ordered_body + space() + tool_close(literal("}")); + tool_parser = tool(ordered_body); + + tool_choices |= rule("tool-" + name, tool_parser); + } + } + + // Build the section with markers + auto tool_calls = tool_choices; + if (parallel_tool_calls) { + tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices); + } + + // Optionally wrap in array brackets + if (array_wrapped) { + tool_calls = literal("[") + space() + tool_calls + space() + literal("]"); + } + + auto section = + trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end)); + + return force_tool_calls ? section : optional(section); +} diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h index 920d5cffd4..7304ca7e61 100644 --- a/common/chat-peg-parser.h +++ b/common/chat-peg-parser.h @@ -5,6 +5,7 @@ #include #include +#include class common_chat_peg_builder : public common_peg_parser_builder { public: @@ -63,65 +64,43 @@ class common_chat_peg_unified_builder : public common_chat_peg_builder { // Low-level tag methods common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); } - common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); } - common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); } - common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); } - common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); } - common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); } - common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); } - common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); } - common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); } - common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); } - common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); } // Use for schema-declared string types - won't be treated as potential JSON container common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); } - common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); } - // High-level building methods - - // Build reasoning block based on ContentStructure - common_peg_parser build_reasoning_block(const content_structure & cs, - common_reasoning_format reasoning_format, - bool thinking_forced_open); - - // Build content block based on ContentStructure - common_peg_parser build_content_block(const content_structure & cs, - common_reasoning_format reasoning_format, - const std::string & tool_section_start = ""); - - // Build complete tool section based on ToolCallStructure - common_peg_parser build_tool_section(const tool_call_structure & ts, - const nlohmann::json & tools, - bool parallel_tool_calls, - bool force_tool_calls); - - // Build single function parser based on ToolCallStructure - common_peg_parser build_function(const tool_call_structure & ts, - const std::string & name, - const nlohmann::json & schema); - - // Build arguments parser based on ToolCallStructure - common_peg_parser build_arguments(const tool_call_structure & ts, const nlohmann::json & params); - // Legacy-compatible helper for building standard JSON tool calls // Used by tests and manual parsers - common_peg_parser standard_json_tools(const std::string & section_start, - const std::string & section_end, - const nlohmann::json & tools, - bool parallel_tool_calls, - bool force_tool_calls); + // name_key/args_key: JSON key names for function name and arguments + // Empty or "name"/"arguments" will accept both common variations + // Supports dot notation for nested objects (e.g., "function.name") + // array_wrapped: if true, tool calls are wrapped in JSON array [...] + // function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}}) + // call_id_key: JSON key for string call ID (e.g., "id") + // gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id") + // parameters_order: order in which JSON fields should be parsed + common_peg_parser standard_json_tools(const std::string & section_start, + const std::string & section_end, + const nlohmann::json & tools, + bool parallel_tool_calls, + bool force_tool_calls, + const std::string & name_key = "", + const std::string & args_key = "", + bool array_wrapped = false, + bool function_is_key = false, + const std::string & call_id_key = "", + const std::string & gen_call_id_key = "", + const std::vector & parameters_order = {}); // Legacy-compatible helper for building XML/tagged style tool calls // Used by tests and manual parsers diff --git a/common/chat.cpp b/common/chat.cpp index 0662e61732..edd98347b3 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1,15 +1,12 @@ #include "chat.h" -#include "chat-auto-parser-helpers.h" #include "chat-auto-parser.h" #include "chat-peg-parser.h" #include "common.h" #include "ggml.h" #include "json-schema-to-grammar.h" #include "log.h" -#include "regex-partial.h" -#include "jinja/parser.h" #include "jinja/value.h" #include "jinja/runtime.h" #include "jinja/caps.h" @@ -1057,6 +1054,114 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp return data; } +// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content} +static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, + const struct templates_params & inputs) { + common_chat_params data; + + data.prompt = common_chat_template_direct_apply(tmpl, inputs); + data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; + data.preserved_tokens = { + ">>>all", + }; + + auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + // Functionary v3.2 format: + // - Normal content: >>>all\n{content} + // - Tool calls: >>>function_name\n{json_args} + // Generation prompt ends with ">>>" so model outputs recipient immediately + + // Build content parser for >>>all\n{content} + // When tools are present, content stops before the next ">>>" (tool call) + // When no tools, content goes until end + auto content_until_tool = p.literal(">>>all\n") + p.content(p.until(">>>")); + auto content_until_end = p.literal(">>>all\n") + p.content(p.rest()); + + // If no tools or tool_choice is NONE, just parse content + if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { + // When no tools, just match the prefix and capture everything after + return content_until_end + p.end(); + } + + // Build tool call parsers for each available function + auto tool_choice = p.choice(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & schema = function.at("parameters"); + + // Tool format: >>>function_name\n{json_args} + auto tool_parser = p.tool( + p.tool_open(p.literal(">>>") + p.tool_name(p.literal(name)) + p.literal("\n")) + + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) + ); + + tool_choice |= p.rule("tool-" + name, tool_parser); + }); + + // The model can output: + // 1. Just content: >>>all\n{content} + // 2. Just tool call(s): >>>function_name\n{json_args} + // 3. Both: >>>all\n{content}>>>function_name\n{json_args} + + // Option 1: Content only (no following tool call) + auto content_only = content_until_end; + + // Option 2: Content followed by tool call(s) + auto content_and_tools = content_until_tool + p.one_or_more(tool_choice); + + // Option 3: Just tool call(s) (no content) + auto tools_only = p.one_or_more(tool_choice); + + if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) { + // Must have at least one tool call + if (inputs.parallel_tool_calls) { + // Multiple tool calls allowed + return p.choice({ content_and_tools, tools_only }) + p.end(); + } else { + // Single tool call only + return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end(); + } + } else { + // Tool calls are optional (auto mode) + if (inputs.parallel_tool_calls) { + // Multiple tool calls allowed + return p.choice({ content_and_tools, content_only, tools_only }) + p.end(); + } else { + // Single tool call at most + auto content_and_tool = content_until_tool + tool_choice; + return p.choice({ content_and_tool, content_only, tool_choice }) + p.end(); + } + } + }); + + data.parser = parser.save(); + + if (include_grammar) { + data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO; + + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + auto schema = function.at("parameters"); + builder.resolve_refs(schema); + }); + parser.build_grammar(builder, data.grammar_lazy); + }); + + // Grammar trigger for when the model starts outputting a tool call + // (after the initial ">>>" in the generation prompt) + data.grammar_triggers = { + { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ">>>" } + }; + } + + return data; +} + namespace workaround { // if first message is system and template does not support it, merge it with next message @@ -1105,6 +1210,8 @@ static void func_args_not_string(json & messages) { } } +} + static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls, const struct common_chat_templates_inputs & inputs) { templates_params params; @@ -1128,7 +1235,10 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ workaround::system_message_not_supported(params.messages); } - if (!tmpl.original_caps().requires_non_null_content) { + if (tmpl.original_caps().supports_tool_calls) { + // some templates will require the content field in tool call messages + // to still be non-null, this puts an empty string everywhere where the + // content field is null workaround::requires_non_null_content(params.messages); } @@ -1163,20 +1273,26 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos && src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) { - LOG_INF("Using specialized template: Ministral/Magistral Large 3\n"); + LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n"); return common_chat_params_init_ministral_3(tmpl, params); } // GPT-OSS - has unique channel-based structure that needs dedicated handler if (src.find("<|channel|>") != std::string::npos) { - LOG_INF("Using specialized template: GPT-OSS\n"); + LOG_DBG("Using specialized template: GPT-OSS\n"); return common_chat_params_init_gpt_oss(tmpl, params); } + // Functionary v3.2 - uses recipient-based format with >>>recipient\n{content} + // Detection: template has ">>>all" for content and ">>>" prefix for tool calls + if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) { + LOG_DBG("Using specialized template: Functionary v3.2\n"); + return common_chat_params_init_functionary_v3_2(tmpl, params); + } + try { - LOG_INF("Using autoparser for template analysis\n"); - template_analysis_result analysis = template_analyzer::analyze_template(tmpl); - auto auto_params = universal_peg_generator::generate_parser(analysis, tmpl, params); + LOG_DBG("Using differential autoparser\n"); + auto auto_params = universal_peg_generator::generate_parser(tmpl, params); return auto_params; } catch (const std::exception & e) { LOG_WRN("Automatic parser generation failed: %s\n", e.what()); @@ -1258,22 +1374,24 @@ common_chat_params common_chat_templates_apply(const struct common_chat_template common_chat_templates_apply_legacy(tmpls, inputs); } -common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) { - return common_chat_peg_parse(syntax.parser, input, is_partial, syntax); +common_chat_msg common_chat_parse(const std::string & input, + bool is_partial, + const common_chat_parser_params & params) { + return common_chat_peg_parse(params.parser, input, is_partial, params); } -common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, - const std::string & input, - bool is_partial, - const common_chat_syntax & syntax) { +common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, + const std::string & input, + bool is_partial, + const common_chat_parser_params & params) { if (parser.empty()) { throw std::runtime_error("Failed to parse due to missing parser definition."); } - LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str()); + LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str()); common_peg_parse_context ctx(input, is_partial); - ctx.debug = syntax.debug; + ctx.debug = params.debug; auto result = parser.parse(ctx); if (result.fail()) { @@ -1283,13 +1401,9 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, // Try to extract any partial results from what was successfully parsed common_chat_msg msg; msg.role = "assistant"; - if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) { - auto mapper = common_chat_peg_unified_mapper(msg); - mapper.from_ast(ctx.ast, result); - } else { - auto mapper = common_chat_peg_mapper(msg); - mapper.from_ast(ctx.ast, result); - } + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + if (ctx.debug) { fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str()); fflush(stderr); @@ -1303,21 +1417,16 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, common_chat_msg msg; msg.role = "assistant"; - if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) { - auto mapper = common_chat_peg_unified_mapper(msg); - mapper.from_ast(ctx.ast, result); - } else { - // Generic mapper - auto mapper = common_chat_peg_mapper(msg); - mapper.from_ast(ctx.ast, result); - } + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + if (ctx.debug) { fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str()); fflush(stderr); } if (!is_partial) { - LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str()); + LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str()); } return msg; } @@ -1327,3 +1436,4 @@ std::map common_chat_templates_get_caps(const common_chat_tem GGML_ASSERT(chat_templates->template_default != nullptr); return chat_templates->template_default->caps.to_map(); } + diff --git a/common/chat.h b/common/chat.h index 4fec39c74f..00f8eb62b6 100644 --- a/common/chat.h +++ b/common/chat.h @@ -267,17 +267,12 @@ std::string common_chat_format_example(const struct common_chat_templates * const std::map & chat_template_kwargs); const char * common_chat_format_name(common_chat_format format); -common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax); -common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax); +common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params); +common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & params); // used by arg and server const char * common_reasoning_format_name(common_reasoning_format format); common_reasoning_format common_reasoning_format_from_name(const std::string & format); -common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax); -common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, - const std::string & input, - bool is_partial, - const common_chat_syntax & syntax); common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice); diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp index 745f17d50f..c6eef6b464 100644 --- a/common/jinja/caps.cpp +++ b/common/jinja/caps.cpp @@ -1,3 +1,4 @@ +#include "log.h" #include "value.h" #include "runtime.h" #include "caps.h" @@ -16,7 +17,7 @@ using json = nlohmann::ordered_json; namespace jinja { using caps_json_fn = std::function; -using caps_analyze_fn = std::function; +using caps_analyze_fn = std::function; static void caps_try_execute(jinja::program & prog, const caps_json_fn & messages_fn, @@ -36,16 +37,20 @@ static void caps_try_execute(jinja::program & prog, auto tools = ctx.get_val("tools"); bool success = false; + std::string result; try { jinja::runtime runtime(ctx); - runtime.execute(prog); + auto results = runtime.execute(prog); + auto parts = jinja::runtime::gather_string_parts(results); + std::string result = parts->as_string().str(); success = true; } catch (const std::exception & e) { JJ_DEBUG("Exception during execution: %s", e.what()); + result = ""; // ignore exceptions during capability analysis } - analyze_fn(success, messages, tools); + analyze_fn(success, messages, tools, result); } // for debugging only @@ -141,7 +146,7 @@ caps caps_get(jinja::program & prog) { // tools return json::array(); }, - [&](bool, value & messages, value &) { + [&](bool, value & messages, value &, const std::string &) { auto & content = messages->at(0)->at("content"); caps_print_stats(content, "messages[0].content"); if (!content->stats.used) { @@ -186,6 +191,15 @@ caps caps_get(jinja::program & prog) { } })} }, + { + {"role", "tool"}, + {"content", "Tool response"}, + {"tool_call_id", "call00001"} + }, + { + {"role", "assistant"}, + {"content", "The tool response was 'tool response'"} + }, { {"role", "user"}, {"content", "User message"}, @@ -215,7 +229,7 @@ caps caps_get(jinja::program & prog) { }, }); }, - [&](bool success, value & messages, value & tools) { + [&](bool success, value & messages, value & tools, const std::string & res) { if (!success) { result.supports_tool_calls = false; result.supports_tools = false; @@ -224,8 +238,11 @@ caps caps_get(jinja::program & prog) { auto & tool_name = tools->at(0)->at("function")->at("name"); caps_print_stats(tool_name, "tools[0].function.name"); + caps_print_stats(tools, "tools"); if (!tool_name->stats.used) { - result.supports_tools = false; + if (!tools->stats.used && res.find(tool_name->as_string().str()) == std::string::npos) { + result.supports_tools = false; + } } auto & tool_calls = messages->at(1)->at("tool_calls");; @@ -243,83 +260,6 @@ caps caps_get(jinja::program & prog) { } ); - // case: requires non-null content in tool calls - if (result.supports_tool_calls) { - caps_try_execute( - prog, - [&]() { - // messages - return json::array({ - { - { "role", "user" }, - { "content", "User message" }, - }, - { - { "role", "assistant" }, - { "tool_calls", - json::array({ - { - { "id", "call00001" }, - { "type", "function" }, - { "function", - { - { "name", "tool1" }, - { "arguments", - { - { "arg", "value" } - } - } - } - } - }, - }) - } - }, - { - { "role", "user" }, - { "content", "User message" }, - }, - }); - }, - [&]() { - // tools - return json::array({ - { - { "name", "tool" }, - { "type", "function" }, - { "function", - { - { "name", "tool1" }, - { "description", "Tool description" }, - { "parameters", - { - { "type", "object" }, - { "properties", - { - { "arg", - { - { "type", "string" }, - { "description", "Arg description" }, - } - }, - } - }, - { "required", json::array({ "arg" }) }, - } - }, - } - }, - }, - }); - }, - [&](bool success, value & /* messages */, value & /* tools */) { - if (!success) { - result.requires_non_null_content = true; - } - } - ); - } - // case: preserve reasoning content in chat history caps_try_execute( prog, @@ -345,7 +285,7 @@ caps caps_get(jinja::program & prog) { // tools return json::array(); }, - [&](bool, value & messages, value &) { + [&](bool, value & messages, value &, const std::string &) { auto & content = messages->at(1)->at("reasoning_content"); caps_print_stats(content, "messages[1].reasoning_content"); if (content->stats.used) { diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 2f67c74d79..efd2c8ef95 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -27,11 +27,11 @@ static std::string build_repetition(const std::string & item_rule, int min_items if (separator_rule.empty()) { if (min_items == 1 && !has_max) { return item_rule + "+"; - } else if (min_items == 0 && !has_max) { + } + if (min_items == 0 && !has_max) { return item_rule + "*"; - } else { - return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}"; - } + } + return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}"; } auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items); @@ -41,7 +41,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items return result; } -static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { +static void build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { auto has_min = min_value != std::numeric_limits::min(); auto has_max = max_value != std::numeric_limits::max(); @@ -128,14 +128,14 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string if (has_min && has_max) { if (min_value < 0 && max_value < 0) { out << "\"-\" ("; - _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true); + build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true); out << ")"; return; } if (min_value < 0) { out << "\"-\" ("; - _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true); + build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true); out << ") | "; min_value = 0; } @@ -159,7 +159,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string if (has_min) { if (min_value < 0) { out << "\"-\" ("; - _build_min_max_int(std::numeric_limits::min(), -min_value, out, decimals_left, /* top_level= */ false); + build_min_max_int(std::numeric_limits::min(), -min_value, out, decimals_left, /* top_level= */ false); out << ") | [0] | [1-9] "; more_digits(0, decimals_left - 1); } else if (min_value == 0) { @@ -194,7 +194,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string } digit_range(c, c); out << " ("; - _build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits::max(), out, less_decimals, /* top_level= */ false); + build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits::max(), out, less_decimals, /* top_level= */ false); out << ")"; if (c < '9') { out << " | "; @@ -213,10 +213,10 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string more_digits(0, less_decimals); out << " | "; } - _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true); + build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true); } else { out << "\"-\" ("; - _build_min_max_int(-max_value, std::numeric_limits::max(), out, decimals_left, /* top_level= */ false); + build_min_max_int(-max_value, std::numeric_limits::max(), out, decimals_left, /* top_level= */ false); out << ")"; } return; @@ -232,7 +232,7 @@ struct BuiltinRule { std::vector deps; }; -std::unordered_map PRIMITIVE_RULES = { +static std::unordered_map PRIMITIVE_RULES = { {"boolean", {"(\"true\" | \"false\") space", {}}}, {"decimal-part", {"[0-9]{1,16}", {}}}, {"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}}, @@ -247,7 +247,7 @@ std::unordered_map PRIMITIVE_RULES = { {"null", {"\"null\" space", {}}}, }; -std::unordered_map STRING_FORMAT_RULES = { +static std::unordered_map STRING_FORMAT_RULES = { {"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}}, {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}}, {"date-time", {"date \"T\" time", {"date", "time"}}}, @@ -260,22 +260,26 @@ static bool is_reserved_name(const std::string & name) { static const std::unordered_set RESERVED_NAMES = [] { std::unordered_set s; s.insert("root"); - for (const auto & p : PRIMITIVE_RULES) s.insert(p.first); - for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first); + for (const auto & p : PRIMITIVE_RULES) { + s.insert(p.first); + } + for (const auto & p : STRING_FORMAT_RULES) { + s.insert(p.first); + } return s; }(); return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); } -std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+"); -std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]"); -std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]"); -std::unordered_map GRAMMAR_LITERAL_ESCAPES = { +static std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+"); +static std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]"); +static std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]"); +static std::unordered_map GRAMMAR_LITERAL_ESCAPES = { {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"} }; -std::unordered_set NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'}; -std::unordered_set ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; +static std::unordered_set NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'}; +static std::unordered_set ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function & replacement) { std::smatch match; @@ -322,19 +326,19 @@ private: if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) { _rules[esc_name] = rule; return esc_name; - } else { - int i = 0; - while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) { - i++; - } - std::string key = esc_name + std::to_string(i); - _rules[key] = rule; - return key; } + int i = 0; + while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) { + i++; + } + std::string key = esc_name + std::to_string(i); + _rules[key] = rule; + return key; } std::string _generate_union_rule(const std::string & name, const std::vector & alt_schemas) { std::vector rules; + rules.reserve(alt_schemas.size()); for (size_t i = 0; i < alt_schemas.size(); i++) { rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i))); } @@ -398,6 +402,7 @@ private: flush_literal(); std::vector results; + results.reserve(ret.size()); for (const auto & item : ret) { results.push_back(to_rule(item)); } @@ -551,7 +556,7 @@ private: TrieNode() : is_end_of_string(false) {} void insert(const std::string & string) { - auto node = this; + auto *node = this; for (char c : string) { node = &node->children[c]; } @@ -676,7 +681,7 @@ private: if (ks.empty()) { return res; } - std::string k = ks[0]; + const std::string& k = ks[0]; std::string kv_rule_name = prop_kv_rule_names[k]; std::string comma_ref = "( \",\" space " + kv_rule_name + " )"; if (first_is_optional) { @@ -779,7 +784,7 @@ public: std::string pointer = ref.substr(ref.find('#') + 1); std::vector tokens = string_split(pointer, "/"); for (size_t i = 1; i < tokens.size(); ++i) { - std::string sel = tokens[i]; + const std::string& sel = tokens[i]; if (target.is_object() && target.contains(sel)) { target = target[sel]; } else if (target.is_array()) { @@ -802,7 +807,7 @@ public: _refs[ref] = target; } } else { - for (auto & kv : n.items()) { + for (const auto & kv : n.items()) { visit_refs(kv.value()); } } @@ -812,7 +817,7 @@ public: visit_refs(schema); } - std::string _generate_constant_rule(const json & value) { + static std::string _generate_constant_rule(const json & value) { return format_literal(value.dump()); } @@ -823,10 +828,12 @@ public: if (schema.contains("$ref")) { return _add_rule(rule_name, _resolve_ref(schema["$ref"])); - } else if (schema.contains("oneOf") || schema.contains("anyOf")) { + } + if (schema.contains("oneOf") || schema.contains("anyOf")) { std::vector alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get>() : schema["anyOf"].get>(); return _add_rule(rule_name, _generate_union_rule(name, alt_schemas)); - } else if (schema_type.is_array()) { + } + if (schema_type.is_array()) { std::vector schema_types; for (const auto & t : schema_type) { json schema_copy(schema); @@ -834,15 +841,18 @@ public: schema_types.push_back(schema_copy); } return _add_rule(rule_name, _generate_union_rule(name, schema_types)); - } else if (schema.contains("const")) { + } + if (schema.contains("const")) { return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space"); - } else if (schema.contains("enum")) { + } + if (schema.contains("enum")) { std::vector enum_values; for (const auto & v : schema["enum"]) { enum_values.push_back(_generate_constant_rule(v)); } return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space"); - } else if ((schema_type.is_null() || schema_type == "object") + } + if ((schema_type.is_null() || schema_type == "object") && (schema.contains("properties") || (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { std::unordered_set required; @@ -863,11 +873,12 @@ public: _build_object_rule( properties, required, name, schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); - } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) { + } + if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) { std::unordered_set required; std::vector> properties; std::map enum_values; - std::string hybrid_name = name; + const std::string& hybrid_name = name; std::function add_component = [&](const json & comp_schema, bool is_required) { if (comp_schema.contains("$ref")) { add_component(_refs[comp_schema["$ref"]], is_required); @@ -890,9 +901,9 @@ public: // todo warning } }; - for (auto & t : schema["allOf"]) { + for (const auto & t : schema["allOf"]) { if (t.contains("anyOf")) { - for (auto & tt : t["anyOf"]) { + for (const auto & tt : t["anyOf"]) { add_component(tt, false); } } else { @@ -911,7 +922,8 @@ public: } } return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); - } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { + } + if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; if (items.is_array()) { std::string rule = "\"[\" space "; @@ -923,27 +935,31 @@ public: } rule += " \"]\" space"; return _add_rule(rule_name, rule); - } else { - std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); - int min_items = schema.contains("minItems") ? schema["minItems"].get() : 0; - json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); - int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); - - return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); } - } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { + std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); + int min_items = schema.contains("minItems") ? schema["minItems"].get() : 0; + json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); + int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); + + return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); + } + if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { return _visit_pattern(schema["pattern"], rule_name); - } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { + } + if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); - } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { + } + if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { auto prim_name = schema_format + "-string"; return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name))); - } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { + } + if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); int min_len = schema.contains("minLength") ? schema["minLength"].get() : 0; int max_len = schema.contains("maxLength") ? schema["maxLength"].get() : std::numeric_limits::max(); return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); - } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { + } + if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { int64_t min_value = std::numeric_limits::min(); int64_t max_value = std::numeric_limits::max(); if (schema.contains("minimum")) { @@ -958,19 +974,19 @@ public: } std::stringstream out; out << "("; - _build_min_max_int(min_value, max_value, out); + build_min_max_int(min_value, max_value, out); out << ") space"; return _add_rule(rule_name, out.str()); - } else if (schema.empty() || schema_type == "object") { + } + if (schema.empty() || schema_type == "object") { return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); - } else { - if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { - _errors.push_back("Unrecognized schema: " + schema.dump()); - return ""; - } - // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero - return _add_primitive(rule_name == "root" ? "root" : schema_type.get(), PRIMITIVE_RULES.at(schema_type.get())); + } + if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { + _errors.push_back("Unrecognized schema: " + schema.dump()); + return ""; } + // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero + return _add_primitive(rule_name == "root" ? "root" : schema_type.get(), PRIMITIVE_RULES.at(schema_type.get())); } void check_errors() { @@ -985,7 +1001,7 @@ public: std::string format_grammar() { std::stringstream ss; for (const auto & kv : _rules) { - ss << kv.first << " ::= " << kv.second << std::endl; + ss << kv.first << " ::= " << kv.second << '\n'; } return ss.str(); } diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp index 80dd105246..7a4c1cc398 100644 --- a/common/peg-parser.cpp +++ b/common/peg-parser.cpp @@ -692,6 +692,7 @@ struct parser_executor { switch (ctx.input[pos]) { case '"': + case '\'': case '\\': case '/': case 'b': @@ -768,6 +769,48 @@ struct parser_executor { return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos); } + common_peg_parse_result operator()(const common_peg_python_dict_string_parser & /* p */) { + auto pos = start_pos; + + // Parse string content (without quotes) + while (pos < ctx.input.size()) { + char c = ctx.input[pos]; + + if (c == '\'') { + // Found closing quote - success (don't consume it) + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos); + } + + if (c == '\\') { + auto result = handle_escape_sequence(ctx, start_pos, pos); + if (!result.success()) { + return result; + } + } else { + auto utf8_result = parse_utf8_codepoint(ctx.input, pos); + + if (utf8_result.status == utf8_parse_result::INCOMPLETE) { + if (!ctx.is_partial) { + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos); + } + + if (utf8_result.status == utf8_parse_result::INVALID) { + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos); + } + + pos += utf8_result.bytes_consumed; + } + } + + // Reached end without finding closing quote + if (!ctx.is_partial) { + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos); + } + return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos); + } + common_peg_parse_result operator()(const common_peg_until_parser & p) const { trie matcher(p.delimiters); @@ -955,6 +998,7 @@ void common_peg_arena::resolve_refs() { std::is_same_v || std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v) { @@ -1036,6 +1080,8 @@ std::string common_peg_arena::dump_impl(common_peg_parser_id std::to_string(p.max_count) + ")"; } else if constexpr (std::is_same_v) { return "JsonString()"; + } else if constexpr (std::is_same_v) { + return "PythonDictString()"; } else if constexpr (std::is_same_v) { return "Until(" + string_join(p.delimiters, " | ") + ")"; } else if constexpr (std::is_same_v) { @@ -1266,10 +1312,28 @@ common_peg_parser common_peg_parser_builder::json_number() { } common_peg_parser common_peg_parser_builder::json_string() { + // When allow_python_dict_format is true, accept both single and double quotes + if (allow_python_dict_format_) { + return rule("json-string-flex", [this]() { + auto json_str = sequence({ literal("\""), json_string_content(), literal("\""), space() }); + auto python_str = sequence({ literal("'"), python_dict_string_content(), literal("'"), space() }); + return choice({ json_str, python_str }); + }); + } + // Standard JSON strings with double quotes only return rule("json-string", [this]() { return sequence({ literal("\""), json_string_content(), literal("\""), space() }); }); } +common_peg_parser common_peg_parser_builder::flexible_string() { + // Always returns a choice of both quote styles regardless of flag + return rule("flexible-string", [this]() { + auto json_str = sequence({ literal("\""), json_string_content(), literal("\""), space() }); + auto python_str = sequence({ literal("'"), python_dict_string_content(), literal("'"), space() }); + return choice({ json_str, python_str }); + }); +} + common_peg_parser common_peg_parser_builder::json_bool() { return rule("json-bool", [this]() { return sequence({ choice({ literal("true"), literal("false") }), space() }); }); } @@ -1305,6 +1369,57 @@ common_peg_parser common_peg_parser_builder::json_string_content() { return wrap(arena_.add_parser(common_peg_json_string_parser{})); } +common_peg_parser common_peg_parser_builder::python_dict_string_content() { + return wrap(arena_.add_parser(common_peg_python_dict_string_parser{})); +} + +common_peg_parser common_peg_parser_builder::python_dict_string() { + return rule("python-dict-string", + [this]() { return sequence({ literal("'"), python_dict_string_content(), literal("'"), space() }); }); +} + +common_peg_parser common_peg_parser_builder::python_dict_number() { + // Same as JSON number + return json_number(); +} + +common_peg_parser common_peg_parser_builder::python_dict_bool() { + // Same as JSON bool + return json_bool(); +} + +common_peg_parser common_peg_parser_builder::python_dict_null() { + // Same as JSON null + return json_null(); +} + +common_peg_parser common_peg_parser_builder::python_dict_object() { + return rule("python-dict-object", [this]() { + auto ws = space(); + auto member = sequence({ python_dict_string(), ws, literal(":"), ws, python_dict() }); + auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) }); + return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) }); + }); +} + +common_peg_parser common_peg_parser_builder::python_dict_array() { + return rule("python-dict-array", [this]() { + auto ws = space(); + auto elements = sequence({ python_dict(), zero_or_more(sequence({ literal(","), ws, python_dict() })) }); + return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) }); + }); +} + +common_peg_parser common_peg_parser_builder::python_dict() { + return rule("python-dict-value", [this]() { + std::vector parsers = { + python_dict_object(), python_dict_array(), python_dict_string(), python_dict_number(), + python_dict_bool(), python_dict_null() + }; + return choice(parsers); + }); +} + common_peg_parser common_peg_parser_builder::json_member(const std::string & key, const common_peg_parser & p) { auto ws = space(); return sequence({ @@ -1435,7 +1550,8 @@ static std::unordered_set collect_reachable_rules(const common_peg_ std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || - std::is_same_v) { + std::is_same_v || + std::is_same_v) { // These parsers do not have any children } else if constexpr (std::is_same_v) { for (auto child : p.children) { @@ -1579,6 +1695,8 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}"; } else if constexpr (std::is_same_v) { return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)"; + } else if constexpr (std::is_same_v) { + return R"(( [^'\\] | "\\" ( ['"\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)"; } else if constexpr (std::is_same_v) { if (p.delimiters.empty()) { return ".*"; @@ -1743,6 +1861,10 @@ static nlohmann::json serialize_parser_variant(const common_peg_parser_variant & return json{ { "type", "json_string" } }; + } else if constexpr (std::is_same_v) { + return json{ + { "type", "python_dict_string" } + }; } else if constexpr (std::is_same_v) { return json{ { "type", "until" }, @@ -1876,6 +1998,9 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json if (type == "json_string") { return common_peg_json_string_parser{}; } + if (type == "python_dict_string") { + return common_peg_python_dict_string_parser{}; + } if (type == "until") { if (!j.contains("delimiters") || !j["delimiters"].is_array()) { throw std::runtime_error("until parser missing or invalid 'delimiters' field"); diff --git a/common/peg-parser.h b/common/peg-parser.h index 5d08cf6d47..9bd5e05838 100644 --- a/common/peg-parser.h +++ b/common/peg-parser.h @@ -211,6 +211,7 @@ struct common_peg_chars_parser { }; struct common_peg_json_string_parser {}; +struct common_peg_python_dict_string_parser {}; struct common_peg_until_parser { std::vector delimiters; @@ -259,6 +260,7 @@ using common_peg_parser_variant = std::variant< common_peg_space_parser, common_peg_chars_parser, common_peg_json_string_parser, + common_peg_python_dict_string_parser, common_peg_until_parser, common_peg_schema_parser, common_peg_rule_parser, @@ -316,9 +318,16 @@ class common_peg_parser_builder { common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); } common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); } + + bool allow_python_dict_format_ = false; public: common_peg_parser_builder(); + + // Enable/disable Python dict format support (single-quoted strings). + // When enabled, JSON parsers will also accept Python dict-style single-quoted strings. + void set_allow_python_dict_format(bool allow) { allow_python_dict_format_ = allow; } + bool get_allow_python_dict_format() const { return allow_python_dict_format_; } // Match nothing, always succeed. // S -> ε @@ -424,10 +433,29 @@ class common_peg_parser_builder { // Useful for extracting content within a JSON string. common_peg_parser json_string_content(); + // Matches a string that accepts both JSON double-quoted and Python dict single-quoted styles. + // This is useful when you explicitly want to accept both formats regardless of the allow_python_dict_format flag. + common_peg_parser flexible_string(); + + // Matches a Python dict-style single-quoted string content without the surrounding quotes. + // Useful for extracting content within a Python dict string. + common_peg_parser python_dict_string_content(); + // Matches a JSON object member with a key and associated parser as the // value. common_peg_parser json_member(const std::string & key, const common_peg_parser & p); + // Creates a complete Python dict format parser supporting objects, arrays, single-quoted strings, + // numbers, booleans, and null. Similar to JSON but uses single quotes for strings. + // value -> object | array | string | number | true | false | null + common_peg_parser python_dict(); + common_peg_parser python_dict_object(); + common_peg_parser python_dict_string(); + common_peg_parser python_dict_array(); + common_peg_parser python_dict_number(); + common_peg_parser python_dict_bool(); + common_peg_parser python_dict_null(); + // Wraps a parser with JSON schema metadata for grammar generation. // Used internally to convert JSON schemas to GBNF grammar rules. common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false); diff --git a/docs/autoparser.md b/docs/autoparser.md index 3c77c4d304..4b48cceb76 100644 --- a/docs/autoparser.md +++ b/docs/autoparser.md @@ -4,7 +4,15 @@ The auto-parser automatically analyzes chat templates to determine how to parse ## Overview -The unified auto-parser uses a two-phase incremental analysis approach: +The unified auto-parser uses a **pure differential, compositional approach** to analyze chat templates: + +**Core Philosophy**: + +- **Zero Hardcoded Patterns**: All markers extracted through template comparison (the **only heuristic** is JSON detection) +- **Compositional Architecture**: Separate parsers for reasoning, content, and tools that compose cleanly +- **Variant Types**: Structural descriptions (strings) instead of forced enum classification + +**Two-Phase Analysis**: 1. **Phase 1: Content & Reasoning Analysis** - Analyzes how the template handles basic content and reasoning, without considering tools 2. **Phase 2: Tool Call Analysis** - Analyzes tool calling patterns, layered on top of Phase 1 @@ -40,73 +48,210 @@ struct content_structure { }; ``` -### tool_call_structure (Phase 2 Result) +### diff_analysis_result (Analysis Result) -Describes how the template formats tool calls: +The result of differential analysis contains all extracted markers and format classifications: ```cpp -struct tool_call_structure { - bool supports_tools = false; +struct diff_analysis_result { + // Classification results + reasoning_mode reasoning = reasoning_mode::NONE; + content_mode content = content_mode::PLAIN; + tool_format tools = tool_format::NONE; + argument_format args = argument_format::JSON; - // Container markers (what wraps all tool calls) - std::string tool_section_start; // e.g., "", "[TOOL_CALLS]", "", "" - std::string tool_section_end; // e.g., "", "]", "", "" + // All extracted markers (see marker_registry below) + marker_registry markers; - // Function format (how individual functions are structured) - enum function_format { - FUNC_JSON_OBJECT, // {"name": "X", "arguments": {...}} - FUNC_TAG_WITH_NAME, // {...} - FUNC_TAG_NAME_ONLY, // ... where X is function name (rare) - FUNC_PREFIXED_INDEXED, // <|tool_call_begin|>functions.X:0<|tool_call_argument_begin|>{...}<|tool_call_end|> - FUNC_NAME_AS_KEY, // [{"function_name": {...arguments...}}] (Apertus-style) - FUNC_BRACKET_TAG, // [TOOL_CALLS]X[CALL_ID]id[ARGS]{...} (Mistral Small 3.2 style) - FUNC_RECIPIENT_BASED, // >>>recipient\n{content} where recipient is "all" (content) or function name (tools) - FUNC_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[{"tool_name": "X", ...}]\n``` (Cohere Command-R Plus) - }; - function_format function_format = FUNC_JSON_OBJECT; + // JSON field names (for JSON-based formats) + std::string name_field = "name"; + std::string args_field = "arguments"; + std::string id_field; - // For FUNC_JSON_OBJECT format - field names (may vary between templates) - std::string name_field = "name"; // Could be "tool_name", "function" - std::string args_field = "arguments"; // Could be "parameters", "params", "input" - std::string id_field; // Optional: "id", "tool_call_id", "" - - // For FUNC_TAG_WITH_NAME format - std::string function_prefix; // e.g., "" - std::string function_close; // e.g., "" - - // For FUNC_PREFIXED_INDEXED format (e.g., Kimi-K2) - std::string per_call_start; // e.g., "<|tool_call_begin|>" - std::string function_namespace; // e.g., "functions." (prefix before function name) - std::string args_marker; // e.g., "<|tool_call_argument_begin|>" - std::string per_call_end; // e.g., "<|tool_call_end|>" - - // For FUNC_BRACKET_TAG format (e.g., Mistral Small 3.2) - std::string id_marker; // e.g., "[CALL_ID]" - marker before tool call ID - - // For FUNC_MARKDOWN_CODE_BLOCK format (Cohere Command-R Plus) - std::string code_block_marker; // e.g., "Action:" - text marker before code block - std::string code_block_language; // e.g., "json" - language identifier in code fence - - // Argument format (how arguments are structured within a function) - enum argument_format { - ARGS_JSON, // Standard JSON object: {"key": "value", ...} - ARGS_TAGGED, // XML-style: value - ARGS_KEY_VALUE_TAGS, // keyvalue (GLM-4.6) - }; - argument_format argument_format = ARGS_JSON; - - // For ARGS_TAGGED format - std::string arg_prefix; // e.g., "" - std::string arg_close; // e.g., "", "" - std::string arg_separator; // e.g., "", "\n" - - // Flag: template renders null content as "None" string, requires empty string instead + // Flags + bool supports_tools = false; + bool supports_parallel_calls = false; bool requires_nonnull_content = false; + + // Preserved tokens for tokenizer + std::vector preserved_tokens; }; ``` +### marker_registry (Extracted Markers) + +All markers are extracted via differential analysis without hardcoded patterns: + +```cpp +struct marker_registry { + // === Reasoning markers === + std::string reasoning_start; // e.g., "", "[THINK]", "<|START_THINKING|>" + std::string reasoning_end; // e.g., "", "[/THINK]", "<|END_THINKING|>" + + // === Content markers === + std::string content_start; // e.g., "", ">>>all\n" + std::string content_end; // e.g., "" + + // === Tool section markers === + std::string tool_section_start; // e.g., "", "[TOOL_CALLS]" + std::string tool_section_end; // e.g., "", "]" + std::string per_call_start; // e.g., "\u2985" (for multi-call templates) + std::string per_call_end; // e.g., " \u2985" + std::string call_separator; // e.g., ",", "\n" + + // === Function markers === + std::string func_name_prefix; // e.g., "", "\"" + std::string func_close; // e.g., "" + std::string args_start; // e.g., "{", " \u300b" + std::string args_end; // e.g., "}", "" + + // === Argument markers (for tagged args format) === + std::string arg_name_prefix; // e.g., "" + std::string arg_name_suffix; // e.g., ">", "" + std::string arg_value_prefix; // e.g., "", "" + std::string arg_value_suffix; // e.g., "", "" + std::string arg_separator; + + // === Special markers === + std::string code_block_marker; // e.g., "Action:" (markdown code block format) + std::string id_marker; // e.g., "[CALL_ID]" (bracket-tag format) + std::string function_namespace; // e.g., "functions." (prefixed-indexed format) +}; +``` + +## Tool Calling Formats + +The auto-parser recognizes three primary tool calling formats. Other formats may be deprecated in future versions. + +### JSON_NATIVE + +**Structure**: The entire tool call (function name, arguments, and values) is in JSON format. There may be enclosing tags around the tool calling section. + +**Characteristics**: +- Function name is a JSON field: `"name": "function_name"` +- Arguments are a JSON object: `"arguments": {"key": "value"}` +- May be wrapped in section markers like `...` or `[TOOL_CALLS]...]` + +**Examples**: + +Standard OpenAI-style: +```json + +{"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}} + +``` + +Mistral Nemo with array wrapper: +```json +[TOOL_CALLS] +[{"name": "calculate", "arguments": {"expr": "2+2"}}] +``` + +Hermes-style with tool_calls wrapper: +```json + +{"name": "search", "arguments": {"query": "llama.cpp"}} + +``` + +**Detection**: `args_start == "{"`, `args_end == "}"`, no function name prefix markers + +--- + +### TAG_WITH_JSON + +**Structure**: The function name is outside the JSON structure, typically within quasi-XML markers. Arguments are still provided as a JSON object. + +**Characteristics**: +- Function name appears in tag attributes: `` or `` +- Arguments are a JSON object following the tag +- Has closing tags: `` or `` +- Arguments remain valid JSON + +**Examples**: + +Nemotron-style: +```xml +get_weather{"location": "Paris"} +``` + +Functionary v3.1: +```xml +{"location": "Paris", "unit": "celsius"} +``` + +ByteDance Seed-OSS: +```xml + +get_weather +{"location": "Paris"} + +``` + +MiniMax: +```xml + +calculate +{"expr": "2+2"} + +``` + +**Detection**: `func_name_prefix` starts with `<`, `args_start == "{"`, arguments are JSON + +--- + +### TAG_WITH_TAGGED + +**Structure**: Both the function name AND argument names are in XML-style tags. Argument values may be JSON or unquoted primitives depending on schema type. + +**Characteristics**: +- Function name in tag: `` or `` +- Each argument has its own tag: `value` +- String values are **unquoted** (raw text content of the tag) +- Non-string values (objects, arrays, numbers, booleans) are still JSON-formatted +- Supports streaming: partial arguments can be parsed incrementally + +**Examples**: + +Qwen/Hermes XML format: +```xml + +Paris +celsius + +``` + +Note how string values (`Paris`, `celsius`) are unquoted inside the tags. + +Mixed types example: +```xml + +2+2 +2 +{"round": true} + +``` + +Here: +- `expr` and `precision` are strings (unquoted) +- `options` is an object (JSON-formatted inside the tag) + +**Detection**: `arg_name_prefix` is non-empty, arguments use tagged format rather than JSON object + +--- + +### Other Formats (To Be Deprecated) + +The following formats are currently supported but will likely be deprecated: + +| Format | Description | Example | +|--------|-------------|---------| +| `BRACKET_TAG` | Bracket-based markers | `[TOOL_CALLS]func[ARGS]{...}` | +| `PREFIXED_INDEXED` | Namespace prefix with index | `functions.name:0{...}` | +| `RECIPIENT_BASED` | Recipient routing | `>>>recipient\n{content}` | +| `MARKDOWN_BLOCK` | Markdown code blocks | `Action:\n\`\`\`json\n[...]` | + ## Analysis Flow ```console @@ -129,13 +274,13 @@ Phase 2: analyze_tool_structure() |-- Classify argument format (JSON vs tagged) | v -tool_call_structure +diff_analysis_result | v -generate_parser(content_structure, tool_call_structure) - |-- build_reasoning_block(content_structure) - |-- build_content_block(content_structure) - |-- build_tool_section(tool_call_structure, tools) +generate_parser(diff_analysis_result) + |-- build_reasoning_block(diff_analysis_result) + |-- build_content_block(diff_analysis_result) + |-- build_tool_section(diff_analysis_result, tools) |-- Compose into final parser | v @@ -148,14 +293,13 @@ The mechanism starts in `common/chat.cpp`, in `common_chat_templates_apply_jinja ```cpp // 1. Analyze the template (two-phase) -template_analysis_result analysis = template_analyzer::analyze_template(tmpl); +auto analysis = differential_analyzer::analyze(tmpl); // 2. Generate the parser and grammar -auto auto_params = universal_peg_generator::generate_parser(analysis, tmpl, params); +auto auto_params = universal_peg_generator::generate_parser(tmpl, params); // 3. Use if it provides more than basic content handling if (auto_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY || - auto_params.thinking_forced_open || !auto_params.parser.empty()) { return auto_params; } @@ -165,32 +309,32 @@ if (auto_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY || The unified builder (`common_chat_peg_unified_builder`) provides high-level methods: -- `build_reasoning_block(cs, reasoning_format, thinking_forced_open)` - Build reasoning parser -- `build_content_block(cs, reasoning_format)` - Build content parser -- `build_tool_section(ts, tools, parallel_tool_calls, force_tool_calls)` - Build tool section -- `build_function(ts, name, schema)` - Build single function parser -- `build_arguments(ts, schema)` - Build arguments parser +- `build_reasoning_block(analysis, reasoning_format, thinking_forced_open)` - Build reasoning parser +- `build_content_block(analysis, reasoning_format)` - Build content parser +- `build_tool_section(analysis, tools, parallel_tool_calls, force_tool_calls)` - Build tool section +- `build_function(analysis, name, schema)` - Build single function parser +- `build_arguments(analysis, schema)` - Build arguments parser ## Key Templates Supported - **Granite** - `` + `` with tool calls - **Nemotron** - JSON tools with `` wrapper -- **Qwen/Hermes** - XML-style `` format +- **Qwen/Hermes** - XML-style `` format (TAG_WITH_TAGGED) - **Command-R7B** - `<|START_THINKING|>`/`<|START_RESPONSE|>` + `<|START_ACTION|>` tools - **DeepSeek R1** - Forced thinking + complex tools -- **Mistral Nemo** - `[TOOL_CALLS]` wrapper -- **MiniMax** - `` wrapper with XML tools +- **Mistral Nemo** - `[TOOL_CALLS]` wrapper (JSON_NATIVE) +- **MiniMax** - `` wrapper with JSON args (TAG_WITH_JSON) - **GLM-4.6** - `` + `name\n......` format -- **Kimi-K2** - `FUNC_PREFIXED_INDEXED` format with namespace and indices -- **Mistral Small 3.2** - `FUNC_BRACKET_TAG` format with `[TOOL_CALLS]` markers -- **Functionary v3.2** - `FUNC_RECIPIENT_BASED` format with `>>>` routing +- **Kimi-K2** - `PREFIXED_INDEXED` format with namespace and indices +- **Mistral Small 3.2** - `BRACKET_TAG` format with `[TOOL_CALLS]` markers +- **Functionary v3.2** - `RECIPIENT_BASED` format with `>>>` routing ## Files | File | Purpose | |------|---------| | `common/chat-auto-parser.h` | Data structures and API declarations | -| `common/chat-auto-parser-analyzer.cpp` | Phase 1 and Phase 2 analysis implementation | +| `common/chat-diff-analyzer.h/cpp` | Differential analysis implementation | | `common/chat-auto-parser-generator.cpp` | PEG parser generator | | `common/chat-auto-parser-helpers.h/cpp` | Shared helper functions | | `common/chat-peg-parser.h/cpp` | Unified builder and mapper classes | @@ -205,7 +349,7 @@ The unified builder (`common_chat_peg_unified_builder`) provides high-level meth **Method 1: Differential Reasoning Content Analysis** - Render template with `reasoning_content` field present vs absent -- Compare outputs to find markers between `THOUGHT_MARKER` and `CONTENT_MARKER` +- Compare outputs to find markers between reasoning and content - If only closing tag found, derive opening tag using patterns: - XML: `` → `` - Special tokens: `<|END_X|>` → `<|START_X|>`, `<|/X|>` → `<|X|>` @@ -260,85 +404,121 @@ The unified builder (`common_chat_peg_unified_builder`) provides high-level meth ### Phase 2: Tool Call Structure Analysis -#### Differential Analysis Algorithm +#### Pure Differential Analysis Algorithm -**Test Payload Strategy**: +**Key Principle**: All patterns are extracted through template comparison. The **only heuristic** is detecting JSON vs marker-based structures (via JSON parse attempt). No hardcoded pattern lists. -1. **Base**: User + Assistant with content only (no tools) -2. **Tool 1**: User + Assistant with tool_calls (empty args) -3. **Tool 2**: User + Assistant with tool_calls (with args) -4. **Tool 3**: User + Assistant with multiple tool calls +**Comparison Matrix**: -**Pattern Extraction Process**: +| Comparison | Purpose | What's Extracted | +|------------|---------|------------------| +| **T1**: No tools vs tools | Tool section markers | `tool_section_start`, `tool_section_end` | +| **T2**: 1 call vs 2 calls | Call separators | `per_call_start`, `call_separator` | +| **T3**: func_alpha vs func_beta | Function boundaries | `func_name_prefix`, `func_name_suffix` | +| **T4**: 1 arg vs 2 args | Argument separator | `arg_separator` | +| **T5**: No args vs args | Args container | `args_start`, `args_end` | +| **A1**: key1 vs key2 | Arg name boundaries | `arg_name_prefix`, `arg_name_suffix` | +| **A2**: value A vs B | Arg value boundaries | `arg_value_prefix`, `arg_value_suffix` | +| **A3**: number vs string | Quoting behavior | Value type handling | -1. Compute string differences between base and tool outputs -2. Use `test_function_name` as reliable search anchor (using `rfind` for last occurrence) -3. Extract structural elements: - - `tool_call_opener`: Common prefix before function name - - `tool_call_closer`: Common suffix after function calls - - `function_opener`: Tag immediately before function name - - `function_closer`: Tag after function content - - `parameter_key_prefix/suffix`: Argument wrapping patterns +**Structural Extraction Helpers**: -#### Format Classification Logic +```cpp +// Extract last structural marker from string (finds last <, [, {, or ") +std::string extract_structural_suffix(const std::string & str); -**FORMAT_JSON_NATIVE**: +// Extract first structural marker from string (finds first >, ], }, or ") +std::string extract_structural_prefix(const std::string & str); -- Detected by `{"name":` pattern in `tool_call_opener` -- Or XML markers with JSON structure +// The only heuristic: detect if content is valid JSON +bool is_json_based(const std::string & content); +``` -**FORMAT_XML_CONSTRUCTED**: +**Pattern Extraction Process** (Example - T1: Tool Section Markers): -- `function_opener` starts with `<` -- No substantial parameter markers +1. Render template with/without tool calls +2. Compute diff: `calculate_diff_split(output_no_tools, output_with_tools)` +3. Use controlled function name (`func_alpha`) as anchor in `diff.right` +4. Extract structural prefix before function name → `tool_section_start` +5. Extract structural suffix after tool content → `tool_section_end` -**FORMAT_RECIPIENT_BASED**: +**No Pattern Lists**: Unlike the old approach, there are no hardcoded lists like `["", "[TOOL_CALLS]", ...]`. All markers are discovered through differential comparison. -- `tool_call_start_marker == function_opener` -- No parameter markers -- Opener doesn't start with structural chars +#### Variant Detection Logic -**FORMAT_BRACKET_TAG**: +Instead of forcing patterns into enum types, the analyzer detects **variant types** as strings that describe the structural characteristics: -- `function_name_suffix` contains bracket tags like `[CALL_ID]...[ARGS]` -- `tool_call_start_marker` matches `[TOOL_CALLS]` pattern +**Variant Types**: -**FORMAT_PREFIXED_INDEXED**: +- `"json-native"`: Pure JSON tool calls (Llama, Mistral Nemo) +- `"tagged-json"`: Function name in markers, args in JSON (Functionary v3.1, Nemotron) +- `"tagged-args"`: Full XML-style with tagged arguments (Qwen, Hermes, MiniMax) +- `"bracket-tag"`: Bracket markers (Mistral Small 3.2: `[TOOL_CALLS]func[ARGS]{...}`) +- `"recipient-based"`: Recipient routing (Functionary v3.2: `>>>func_name`) +- `"markdown-block"`: Markdown code blocks (Cohere Command-R Plus) +- `"prefixed-indexed"`: Namespace prefix with indices (Kimi-K2: `functions.name:0`) -- `function_opener` ends with `.` (namespace separator) -- `function_name_suffix` starts with `:` followed by digit -- Example: `functions.name:0<|tool_call_argument_begin|>` +**Detection Strategy** (from most to least distinctive): -#### Specialized Format Handling +```cpp +void detect_tool_variant(diff_analysis_result & result) { + // 1. Check for unique markers (most distinctive) + if (!result.markers.id_marker.empty()) + → "bracket-tag" -**FUNC_PREFIXED_INDEXED (Kimi-K2)**: + if (markers contain ">>>") + → "recipient-based" -- Splits `function_opener` at last `>` to get `per_call_start` + `function_namespace` -- Extracts `args_marker` from `function_name_suffix` -- Derives `per_call_end` by matching structural patterns in `tool_call_closer` + if (code_block_marker present) + → "markdown-block" -**FUNC_TAG_WITH_NAME (Functionary/Nemotron)**: + if (function_namespace or suffix contains ':') + → "prefixed-indexed" -- Detects nested vs non-nested formats -- Uses overlap detection between `tool_section_start` and `function_prefix` -- Handles double-wrapping prevention + // 2. Check argument structure (JSON variants) + if (arg_name_prefix starts with '<') + → "tagged-args" -**ARGS_KEY_VALUE_TAGS (GLM-4.6)**: + if (func_name_prefix starts with '<') + → "tagged-json" -- Detects `keyvalue` pattern -- Cleans up suffix to extract just the key closer + // 3. Default + → "json-native" +} +``` -**FUNC_RECIPIENT_BASED (Functionary v3.2)**: +#### Compositional Parser Building -- Detects `>>>` recipient delimiter format -- Routes to "all" for content, function name for tools -- Uses same delimiter for both content and tool routing +The analyzer builds separate, composable parsers for each component: -**FUNC_BRACKET_TAG (Mistral Small 3.2/Devstral)**: +**Reasoning Parser**: -- Detects `[TOOL_CALLS]function_name[ARGS]{...}` pattern -- Optional `[CALL_ID]id` marker for tool call identification -- No section wrapper - each call starts independently +- Built from `reasoning_start` and `reasoning_end` markers +- Supports tag-based, delimiter, and forced-open modes + +**Content Parser**: + +- Built from `content_start` and `content_end` markers +- Supports plain, always-wrapped, and conditionally-wrapped modes + +**Tool Parser** (variant-specific): + +- Built based on `variant_type` detection +- Each variant has its own builder that uses the extracted markers +- No enum forcing - structure preserved as discovered + +**Final Composition**: + +```cpp +sequence({ + reasoning_parser, + space(), + content_parser, + space(), + tool_parser, + end() +}) +``` ### Generator Algorithms @@ -386,13 +566,13 @@ The test suite covers: **Tool Call Formats**: -- JSON: Llama 3.x, Mistral Nemo, Hermes, MiMo-VL -- XML: Nemotron, Qwen3-Coder, MiniMax -- Tagged: GLM-4.6 (key-value tags) -- Bracket-tag: Mistral Small 3.2, Devstral -- Prefixed-indexed: Kimi-K2 variants -- Name-as-key: Apertus-8B -- Recipient-based: Functionary v3.2 +- JSON_NATIVE: Llama 3.x, Mistral Nemo, Hermes, MiMo-VL +- TAG_WITH_JSON: Nemotron, Qwen3-Coder, MiniMax +- TAG_WITH_TAGGED: Qwen, Hermes (XML), ByteDance Seed-OSS +- BRACKET_TAG: Mistral Small 3.2, Devstral +- PREFIXED_INDEXED: Kimi-K2 variants +- RECIPIENT_BASED: Functionary v3.2 +- MARKDOWN_BLOCK: Cohere Command-R Plus **Edge Cases**: @@ -433,11 +613,11 @@ tst.test("input") To support a new template format: -1. **If it follows standard patterns** - The auto-parser should detect it automatically -2. **If it has unique markers** - Add the markers to the detection patterns in: - - `detect_reasoning_markers()` for reasoning tags - - `detect_content_markers()` for content wrappers - - `extract_patterns_from_differences()` for tool call patterns +1. **If it follows standard patterns** - The auto-parser should detect it automatically using the three main formats (JSON_NATIVE, TAG_WITH_JSON, TAG_WITH_TAGGED) +2. **If it has unique markers** - Add differential analysis patterns in: + - `compare_reasoning_presence()` for reasoning tags + - `compare_content_values()` for content wrappers + - `extract_tool_section()` for tool call patterns 3. **If it needs special handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block ## Edge Cases and Quirks @@ -458,28 +638,28 @@ The following templates have active tests in `tests/test-chat.cpp`: | Template | Format | Notes | |----------|--------|-------| -| DeepSeek V3.1 | `FUNC_JSON_OBJECT` | Forced thinking mode | +| DeepSeek V3.1 | `JSON_NATIVE` | Forced thinking mode | | DeepSeek R1 Distill (Llama/Qwen) | Reasoning only | Forced-open thinking | | llama-cpp-deepseek-r1 | Reasoning only | Forced-open thinking | -| GLM-4.6 | `ARGS_KEY_VALUE_TAGS` | `name\n......` format | -| Kimi-K2 / Kimi-K2-Instruct / Kimi-K2-Thinking | `FUNC_PREFIXED_INDEXED` | `functions.name:0` with special markers | -| Apertus-8B-Instruct | `FUNC_NAME_AS_KEY` | `{"function_name": {...}}` format | -| MiniMax-M2 | `FUNC_TAG_WITH_NAME` | XML invoke with parameter tags | -| NVIDIA-Nemotron-Nano-v2 | `FUNC_JSON_OBJECT` | `` wrapper (nested) | -| Mistral-Nemo-Instruct-2407 | `FUNC_JSON_OBJECT` | `[TOOL_CALLS]` wrapper with id field | -| Functionary v3.1 | `FUNC_TAG_WITH_NAME` | `` non-nested format | -| Functionary v3.2 | `FUNC_RECIPIENT_BASED` | `>>>` recipient delimiter format | -| MiMo-VL / Hermes 3 / Qwen 2.5 | `FUNC_JSON_OBJECT` | `` wrapper | -| Apriel 1.5 | `FUNC_JSON_OBJECT` | `` wrapper with JSON array | +| GLM-4.6 | `TAGGED` | `name\n......` format | +| Kimi-K2 / Kimi-K2-Instruct / Kimi-K2-Thinking | `PREFIXED_INDEXED` | `functions.name:0` with special markers | +| Apertus-8B-Instruct | `NAME_AS_KEY` | `{"function_name": {...}}` format | +| MiniMax-M2 | `TAG_WITH_JSON` | XML invoke with parameter tags | +| NVIDIA-Nemotron-Nano-v2 | `JSON_NATIVE` | `` wrapper (nested) | +| Mistral-Nemo-Instruct-2407 | `JSON_NATIVE` | `[TOOL_CALLS]` wrapper with id field | +| Functionary v3.1 | `TAG_WITH_JSON` | `` non-nested format | +| Functionary v3.2 | `RECIPIENT_BASED` | `>>>` recipient delimiter format | +| MiMo-VL / Hermes 3 / Qwen 2.5 | `JSON_NATIVE` | `` wrapper | +| Apriel 1.5 | `JSON_NATIVE` | `` wrapper with JSON array | | Apriel 1.6 Thinker | Reasoning only | Implicit reasoning start | -| Cohere Command-R7B | `FUNC_JSON_OBJECT` | `START_RESPONSE/ACTION/THINKING` markers | -| Mistral Small 3.2 | `FUNC_BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` with ID | -| Devstral | `FUNC_BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` without ID | +| Cohere Command-R7B | `JSON_NATIVE` | START_RESPONSE/ACTION/THINKING markers | +| Mistral Small 3.2 | `BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` with ID | +| Devstral | `BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` without ID | | Ministral-3-14B-Reasoning | Custom reasoning | `[THINK]...[/THINK]` tags | -| IBM Granite | `FUNC_JSON_OBJECT` | `` + `` | -| ByteDance Seed-OSS | `FUNC_TAG_WITH_NAME` | Custom `` and `` tags | -| Qwen3-Coder | `FUNC_TAG_WITH_NAME` | XML-style tool format | -| Cohere Command-R Plus | `FUNC_MARKDOWN_CODE_BLOCK` | `Action:\n\`\`\`json\n[...]\n\`\`\`` format | +| IBM Granite | `JSON_NATIVE` | `` + `` | +| ByteDance Seed-OSS | `TAG_WITH_TAGGED` | Custom `` and `` tags | +| Qwen3-Coder | `TAG_WITH_TAGGED` | XML-style tool format | +| Cohere Command-R Plus | `MARKDOWN_BLOCK` | `Action:\n`\`\`\`json\n[...]\n`\`\`` format | ### Currently Unsupported Templates @@ -496,18 +676,25 @@ Some templates genuinely don't support tool calls (this is not a detection bug): ### TODO / Roadmap -- [ ] **Fix OpenAI GPT-OSS**: Add `FUNC_CHANNEL_BASED` format for channel marker structure. -- [x] **~~Fix Cohere Command-R Plus~~**: Added `FUNC_MARKDOWN_CODE_BLOCK` format for `Action:\n\`\`\`json` structure. +- [ ] **Fix OpenAI GPT-OSS**: Add handling for channel marker structure. +- [x] **~~Fix Cohere Command-R Plus~~**: Added `MARKDOWN_BLOCK` format for `Action:\n`\`\`\`json` structure. ### Recent Additions (Dec 2025 - Jan 2026) -- **FUNC_RECIPIENT_BASED**: Support for Functionary v3.2's `>>>` recipient delimiter format -- **FUNC_BRACKET_TAG**: Support for Mistral Small 3.2 and Devstral's `[TOOL_CALLS]...` format +- **RECIPIENT_BASED**: Support for Functionary v3.2's `>>>` recipient delimiter format +- **BRACKET_TAG**: Support for Mistral Small 3.2 and Devstral's `[TOOL_CALLS]...` format - **Enhanced Content Detection**: Better handling of custom reasoning tags and content wrappers - **Improved Streaming Support**: Better handling of partial parsing for all supported formats - **Custom Tag Support**: Support for non-standard reasoning tags like `` (ByteDance) - **Multi-line Tool Arguments**: Better parsing of complex tool arguments with code blocks -- **FUNC_MARKDOWN_CODE_BLOCK**: Support for Cohere Command-R Plus markdown code block format +- **MARKDOWN_BLOCK**: Support for Cohere Command-R Plus markdown code block format - **Implicit Reasoning Support**: Support for templates where reasoning starts implicitly without a start marker. +- **Pure Differential Refactoring (Jan 2026)**: Complete refactoring to eliminate hardcoded patterns: + - Removed all hardcoded pattern lists (previously had `["", "[TOOL_CALLS]", ...]`) + - Added structural extraction helpers (`extract_structural_suffix`, `extract_structural_prefix`) + - Replaced enum-based classification with string-based variant types + - Only remaining heuristic: JSON detection via parse attempt + - All markers now discovered through differential template comparison +- **Three Primary Tool Formats**: Consolidated tool calling formats to JSON_NATIVE, TAG_WITH_JSON, and TAG_WITH_TAGGED for clarity and maintainability The auto-parser now successfully handles 25+ different template formats across reasoning-only, tool-calling, and hybrid models, with comprehensive test coverage ensuring robust parsing across streaming and non-streaming scenarios. diff --git a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja index 078e9f5458..e144cfcf69 100644 --- a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +++ b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja @@ -132,7 +132,7 @@ The following instructions take precedence over instructions in the default prea {%- elif message.role|lower == 'user' %} <|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %} {%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %} -<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[ +<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.reasoning_content}}<|END_THINKING|><|START_ACTION|>[ {% for tc in message.tool_calls %} {"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %} diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja index fff2b755e2..0c8d81e107 100644 --- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja @@ -11,7 +11,7 @@ {%- if message['role'] == 'user' -%} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif -%} - {%- if message['role'] == 'assistant' and message['content'] is none -%} + {%- if message['role'] == 'assistant' and message['tool_calls'] -%} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']-%} {%- if not ns.is_first -%} diff --git a/template.ans b/template.ans new file mode 100644 index 0000000000..da602b1a07 --- /dev/null +++ b/template.ans @@ -0,0 +1,7774 @@ + +================================================================================ + TEMPLATE ANALYSIS TOOL +================================================================================ +Analyzing 37 template(s) + +================================================================================ + ANALYZING TEMPLATE: models/templates/Apertus-8B-Instruct.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities:' +Common Suffix: '<|developer_end|><|user_start|>Hello, please help me.<|user_end|>' +Left (difference): ' disabled' +Right (difference): ' +// +type = () => any;' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities: disabled<|developer_end|><|user_start|>Hello, please help me.<|user_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|assistant_start|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities: disabled<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|>I can help you with that.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities: disabled<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|>I can help you with that.<|assistant_end|><|user_start|>Thank you.<|user_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities: +// +type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|>' +Common Suffix: '' +Left (difference): 'Let me help you.' +Right (difference): '<|tools_prefix|>[{"test_function_name": {"param1":0x6414d8ab7770, "param2":0x6414d8b07d80}}]<|tools_suffix|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities: +// +type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|>' +Common Suffix: '<|assistant_end|><|user_start|>Continue.<|user_end|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tools_prefix|>[{"test_function_name": {"param1":0x6414d8b0a5f0, "param2":0x6414d8b03350}}]<|tools_suffix|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities: +// +type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|><|tools_prefix|>[{"test_function_name": {"param1":0x6414d8ab' +Common Suffix: '0}}]<|tools_suffix|>' +Left (difference): '8210, "param2":0x6414d8b1315' +Right (difference): '7220, "param2":0x6414d8b06690}}, {"test_function_name": {"param1":0x6414d8ae81e0, "param2":0x6414d8b0d43' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities: +// +type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|><|tools_prefix|>[{"test_function_name": {"param1":0x6414d8b0' +Common Suffix: '0}}]<|tools_suffix|><|assistant_end|><|user_start|>Continue.<|user_end|>' +Left (difference): '6690, "param2":0x6414d8b0e31' +Right (difference): 'd430, "param2":0x6414d8b06540}}, {"test_function_name": {"param1":0x6414d8b04530, "param2":0x6414d8b05b8' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. +Knowledge cutoff: 2024-04 +Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled +Tool Capabilities: +// +type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|><|tools_prefix|>[{"test_function_name": {"param1":0x6414d8' +Common Suffix: '0}}]<|tools_suffix|>' +Left (difference): 'b05b80, "param2":0x6414d8b0a1c' +Right (difference): 'ae81e0, "param2":0x6414d8b0d43' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/Apriel-1.6-15b-Thinker-fixed.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +' +Common Suffix: '<|begin_user|> +Hello, please help me. +<|begin_assistant|> +Here are my reasoning steps: +' +Left (difference): '' +Right (difference): 'You are provided with function signatures within XML tags. + You may call one or more functions to assist with the user query. + Don't make assumptions about the arguments. You should infer the argument values from previous + user responses and the system message. + Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + . + + Return all function calls as a list of JSON objects within XML tags. + Each JSON object should contain a function name and arguments as follows: + [ + {"name": , "arguments": }, + {"name": , "arguments": }, + ... + ] +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +<|begin_user|> +Hello, please help me. +<|begin_assistant|> +Here are my reasoning steps: +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +<|begin_user|> +Hello, please help me. +<|begin_assistant|> +' +Common Suffix: 'I can help you with that.' +Left (difference): '' +Right (difference): 'The user is asking for help. I should respond positively. +[BEGIN FINAL RESPONSE] +' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +<|begin_user|> +Hello, please help me. +<|begin_assistant|> +I can help you with that. +<|end|> +<|begin_user|> +Thank you. +<|begin_assistant|> +Here are my reasoning steps: +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +You are provided with function signatures within XML tags. + You may call one or more functions to assist with the user query. + Don't make assumptions about the arguments. You should infer the argument values from previous + user responses and the system message. + Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + . + + Return all function calls as a list of JSON objects within XML tags. + Each JSON object should contain a function name and arguments as follows: + [ + {"name": , "arguments": }, + {"name": , "arguments": }, + ... + ] +<|begin_user|> +Hello, please help me. +<|begin_assistant|> +' +Common Suffix: '' +Left (difference): 'Let me help you.' +Right (difference): ' +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}]' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +You are provided with function signatures within XML tags. + You may call one or more functions to assist with the user query. + Don't make assumptions about the arguments. You should infer the argument values from previous + user responses and the system message. + Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + . + + Return all function calls as a list of JSON objects within XML tags. + Each JSON object should contain a function name and arguments as follows: + [ + {"name": , "arguments": }, + {"name": , "arguments": }, + ... + ] +<|begin_user|> +Hello, please help me. +<|begin_assistant|> +' +Common Suffix: ' +<|end|> +<|begin_user|> +Continue. +<|begin_assistant|> +Here are my reasoning steps: +' +Left (difference): 'Let me help you.' +Right (difference): ' +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}, "id": "call_001"}]' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +You are provided with function signatures within XML tags. + You may call one or more functions to assist with the user query. + Don't make assumptions about the arguments. You should infer the argument values from previous + user responses and the system message. + Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + . + + Return all function calls as a list of JSON objects within XML tags. + Each JSON object should contain a function name and arguments as follows: + [ + {"name": , "arguments": }, + {"name": , "arguments": }, + ... + ] +<|begin_user|> +Hello, please help me. +<|begin_assistant|> + +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}' +Common Suffix: ']' +Left (difference): '' +Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}}' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +You are provided with function signatures within XML tags. + You may call one or more functions to assist with the user query. + Don't make assumptions about the arguments. You should infer the argument values from previous + user responses and the system message. + Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + . + + Return all function calls as a list of JSON objects within XML tags. + Each JSON object should contain a function name and arguments as follows: + [ + {"name": , "arguments": }, + {"name": , "arguments": }, + ... + ] +<|begin_user|> +Hello, please help me. +<|begin_assistant|> + +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}, "id": "call_001"}' +Common Suffix: '] +<|end|> +<|begin_user|> +Continue. +<|begin_assistant|> +Here are my reasoning steps: +' +Left (difference): '' +Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}, "id": "call_002"}' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|begin_system|> +You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. + Analyze each question carefully, present your reasoning step-by-step, then provide the final + response after the marker [BEGIN FINAL RESPONSE]. +You are provided with function signatures within XML tags. + You may call one or more functions to assist with the user query. + Don't make assumptions about the arguments. You should infer the argument values from previous + user responses and the system message. + Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + . + + Return all function calls as a list of JSON objects within XML tags. + Each JSON object should contain a function name and arguments as follows: + [ + {"name": , "arguments": }, + {"name": , "arguments": }, + ... + ] +<|begin_user|> +Hello, please help me. +<|begin_assistant|> +' +Common Suffix: ' +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}]' +Left (difference): '' +Right (difference): 'I need to call the tool first. +[BEGIN FINAL RESPONSE] +' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/ByteDance-Seed-OSS.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '' +Common Suffix: 'user +Hello, please help me.' +Left (difference): '' +Right (difference): 'system +You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. + +Function: +def test_function_name(param1: str,param2: str): + """ + A test function for debugging + + Args: + - param1 (str) [必填]: First parameter + - param2 (str) [必填]: Second parameter + + """ +工具调用请遵循如下格式: + + +value_1 +This is the value for the second parameter +that can span +multiple lines + + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: 'user +Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): 'assistant +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: 'user +Hello, please help me.assistant +' +Common Suffix: 'I can help you with that.' +Left (difference): '' +Right (difference): 'The user is asking for help. I should respond positively. +' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: 'user +Hello, please help me.assistant +' +Common Suffix: 'I can help you with that.user +Thank you.' +Left (difference): '' +Right (difference): 'The user is asking for help. I should respond positively. +' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: 'system +You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. + +Function: +def test_function_name(param1: str,param2: str): + """ + A test function for debugging + + Args: + - param1 (str) [必填]: First parameter + - param2 (str) [必填]: Second parameter + + """ +工具调用请遵循如下格式: + + +value_1 +This is the value for the second parameter +that can span +multiple lines + + +user +Hello, please help me.assistant +' +Common Suffix: '' +Left (difference): 'Let me help you.' +Right (difference): ' + +value1 +value2 + +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: 'system +You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. + +Function: +def test_function_name(param1: str,param2: str): + """ + A test function for debugging + + Args: + - param1 (str) [必填]: First parameter + - param2 (str) [必填]: Second parameter + + """ +工具调用请遵循如下格式: + + +value_1 +This is the value for the second parameter +that can span +multiple lines + + +user +Hello, please help me.assistant +' +Common Suffix: 'user +Continue.' +Left (difference): 'Let me help you.' +Right (difference): ' + +value1 +value2 + +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: 'system +You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. + +Function: +def test_function_name(param1: str,param2: str): + """ + A test function for debugging + + Args: + - param1 (str) [必填]: First parameter + - param2 (str) [必填]: Second parameter + + """ +工具调用请遵循如下格式: + + +value_1 +This is the value for the second parameter +that can span +multiple lines + + +user +Hello, please help me.assistant + + +value1 +value2 + +' +Common Suffix: '' +Left (difference): '' +Right (difference): ' + + +value3 +value4 + +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: 'system +You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. + +Function: +def test_function_name(param1: str,param2: str): + """ + A test function for debugging + + Args: + - param1 (str) [必填]: First parameter + - param2 (str) [必填]: Second parameter + + """ +工具调用请遵循如下格式: + + +value_1 +This is the value for the second parameter +that can span +multiple lines + + +user +Hello, please help me.assistant + + +value1 +value2 + +' +Common Suffix: 'user +Continue.' +Left (difference): '' +Right (difference): ' + + +value3 +value4 + +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: 'system +You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. + +Function: +def test_function_name(param1: str,param2: str): + """ + A test function for debugging + + Args: + - param1 (str) [必填]: First parameter + - param2 (str) [必填]: Second parameter + + """ +工具调用请遵循如下格式: + + +value_1 +This is the value for the second parameter +that can span +multiple lines + + +user +Hello, please help me.assistant +' +Common Suffix: ' + +value1 +value2 + +' +Left (difference): '' +Right (difference): 'I need to call the tool first. +' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +' +Common Suffix: '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Left (difference): '' +Right (difference): '```python +def test_function_name(param1: str, param2: str) -> List[Dict]: + """A test function for debugging + + Args: + param1 (str): First parameter + param2 (str): Second parameter + """ + pass +```' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I can help you with that.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I can help you with that.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Thank you.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +```python +def test_function_name(param1: str, param2: str) -> List[Dict]: + """A test function for debugging + + Args: + param1 (str): First parameter + param2 (str): Second parameter + """ + pass +```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Common Suffix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Left (difference): 'Let me help you.<|END_OF_TURN_TOKEN|>' +Right (difference): ' +Action: +```json +[ + { + "tool_name": "test_function_name", + "parameters": { + "param1": "value1", + "param2": "value2" +} + } +]``` +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +```python +def test_function_name(param1: str, param2: str) -> List[Dict]: + """A test function for debugging + + Args: + param1 (str): First parameter + param2 (str): Second parameter + """ + pass +```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Common Suffix: '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Continue.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Left (difference): 'Let me help you.<|END_OF_TURN_TOKEN|>' +Right (difference): ' +Action: +```json +[ + { + "tool_name": "test_function_name", + "parameters": { + "param1": "value1", + "param2": "value2" +} + } +]``` +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +```python +def test_function_name(param1: str, param2: str) -> List[Dict]: + """A test function for debugging + + Args: + param1 (str): First parameter + param2 (str): Second parameter + """ + pass +```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> +Action: +```json +[ + { + "tool_name": "test_function_name", + "parameters": { + "param1": "value1", + "param2": "value2" +} + }' +Common Suffix: ' +]``` +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Left (difference): '' +Right (difference): ', + { + "tool_name": "test_function_name", + "parameters": { + "param1": "value3", + "param2": "value4" +} + }' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +```python +def test_function_name(param1: str, param2: str) -> List[Dict]: + """A test function for debugging + + Args: + param1 (str): First parameter + param2 (str): Second parameter + """ + pass +```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> +Action: +```json +[ + { + "tool_name": "test_function_name", + "parameters": { + "param1": "value1", + "param2": "value2" +} + }' +Common Suffix: ' +]``` +<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Continue.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Left (difference): '' +Right (difference): ', + { + "tool_name": "test_function_name", + "parameters": { + "param1": "value3", + "param2": "value4" +} + }' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble +The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. + +# System Preamble +## Basic Rules +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. + +# User Preamble +## Task and Context +You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. + +## Style Guide +Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. + +## Available Tools +Here is a list of tools that you have available to you: + +```python +def test_function_name(param1: str, param2: str) -> List[Dict]: + """A test function for debugging + + Args: + param1 (str): First parameter + param2 (str): Second parameter + """ + pass +```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> +Action: +```json +[ + { + "tool_name": "test_function_name", + "parameters": { + "param1": "value1", + "param2": "value2" +} + } +]``` +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. +' +Common Suffix: '# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Left (difference): '' +Right (difference): ' +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ + {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} +] +``` + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>I can help you with that.<|END_RESPONSE|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>I can help you with that.<|END_RESPONSE|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Thank you.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ + {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} +] +``` + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Common Suffix: '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Left (difference): '<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>' +Right (difference): '<|START_THINKING|><|END_THINKING|><|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}} +]<|END_ACTION|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ + {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} +] +``` + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Common Suffix: '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Continue.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Left (difference): '<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>' +Right (difference): '<|START_THINKING|><|END_THINKING|><|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}} +]<|END_ACTION|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ + {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} +] +``` + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|><|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' +Common Suffix: ' +]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Left (difference): '' +Right (difference): ', + {"tool_call_id": "1", "tool_name": "test_function_name", "parameters": {"param1": "value3", "param2": "value4"}}' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ + {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} +] +``` + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|><|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' +Common Suffix: ' +]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Continue.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Left (difference): '' +Right (difference): ', + {"tool_call_id": "1", "tool_name": "test_function_name", "parameters": {"param1": "value3", "param2": "value4"}}' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ + {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} +] +``` + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|><|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}} +]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/GLM-4.6.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '[gMASK]' +Common Suffix: '<|user|> +Hello, please help me.' +Left (difference): '' +Right (difference): '<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '[gMASK]<|user|> +Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|assistant|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '[gMASK]<|user|> +Hello, please help me.<|assistant|> +' +Common Suffix: ' +I can help you with that.' +Left (difference): '' +Right (difference): 'The user is asking for help. I should respond positively.' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '[gMASK]<|user|> +Hello, please help me.<|assistant|> + +I can help you with that.<|user|> +Thank you.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +<|user|> +Hello, please help me.<|assistant|> + +' +Common Suffix: '' +Left (difference): 'Let me help you.' +Right (difference): 'test_function_name +param1 +value1 +param2 +value2 +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +<|user|> +Hello, please help me.<|assistant|> + +' +Common Suffix: '<|user|> +Continue.' +Left (difference): 'Let me help you.' +Right (difference): 'test_function_name +param1 +value1 +param2 +value2 +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +<|user|> +Hello, please help me.<|assistant|> + +test_function_name +param1 +value1 +param2 +value2 +' +Common Suffix: '' +Left (difference): '' +Right (difference): ' +test_function_name +param1 +value3 +param2 +value4 +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +<|user|> +Hello, please help me.<|assistant|> + +test_function_name +param1 +value1 +param2 +value2 +' +Common Suffix: '<|user|> +Continue.' +Left (difference): '' +Right (difference): ' +test_function_name +param1 +value3 +param2 +value4 +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +<|user|> +Hello, please help me.<|assistant|> +' +Common Suffix: ' +test_function_name +param1 +value1 +param2 +value2 +' +Left (difference): '' +Right (difference): 'I need to call the tool first.' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/GLM-4.7-Flash.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '[gMASK]' +Common Suffix: '<|user|>Hello, please help me.' +Left (difference): '' +Right (difference): '<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '[gMASK]<|user|>Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|assistant|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '[gMASK]<|user|>Hello, please help me.<|assistant|>' +Common Suffix: 'I can help you with that.' +Left (difference): '' +Right (difference): 'The user is asking for help. I should respond positively.' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '[gMASK]<|user|>Hello, please help me.<|assistant|>I can help you with that.<|user|>Thank you.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>' +Common Suffix: '' +Left (difference): 'Let me help you.' +Right (difference): 'test_function_nameparam1value1param2value2' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>' +Common Suffix: '<|user|>Continue.' +Left (difference): 'Let me help you.' +Right (difference): 'test_function_nameparam1value1param2value2' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>test_function_nameparam1value1param2value2' +Common Suffix: '' +Left (difference): '' +Right (difference): 'test_function_nameparam1value3param2value4' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>test_function_nameparam1value1param2value2' +Common Suffix: '<|user|>Continue.' +Left (difference): '' +Right (difference): 'test_function_nameparam1value3param2value4' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '[gMASK]<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>' +Common Suffix: 'test_function_nameparam1value1param2value2' +Left (difference): '' +Right (difference): 'I need to call the tool first.' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/Kimi-K2-Instruct.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_system|>' +Common Suffix: 'system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' +Left (difference): '' +Right (difference): 'tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_assistant|>assistant<|im_middle|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|><|im_user|>user<|im_middle|>Thank you.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' +Common Suffix: '<|im_end|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' +Common Suffix: '<|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' +Common Suffix: '<|tool_calls_section_end|><|im_end|>' +Left (difference): '' +Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' +Common Suffix: '<|tool_calls_section_end|><|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' +Left (difference): '' +Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> +<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|><|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/Kimi-K2-Thinking.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_system|>' +Common Suffix: 'system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' +Left (difference): '' +Right (difference): 'tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_assistant|>assistant<|im_middle|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|><|im_user|>user<|im_middle|>Thank you.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' +Common Suffix: '<|im_end|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' +Common Suffix: '<|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' +Common Suffix: '<|tool_calls_section_end|><|im_end|>' +Left (difference): '' +Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' +Common Suffix: '<|tool_calls_section_end|><|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' +Left (difference): '' +Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' +Common Suffix: '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|><|im_end|>' +Left (difference): '' +Right (difference): 'I need to call the tool first.' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/MiMo-VL.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi.' +Common Suffix: '<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Left (difference): '' +Right (difference): ' + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi.<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_start|>assistant +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi.<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi.<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +<|im_start|>user +Thank you.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Common Suffix: '<|im_end|> +' +Left (difference): '' +Right (difference): ' + +{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): '' +Right (difference): ' + +{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are MiMo, an AI assistant developed by Xiaomi. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/MiniMax-M2.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant.' +Common Suffix: '[e~[ +]~b]user +Hello, please help me.[e~[ +' +Left (difference): '' +Right (difference): ' + +# Tools +You may call one or more tools to assist with the user query. +Here are the tools available in JSONSchema format: + + +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +When making tool calls, use XML format to invoke tools and pass parameters: + + + +param-value-1 +param-value-2 +... + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant.[e~[ +]~b]user +Hello, please help me.[e~[ +' +Common Suffix: '' +Left (difference): '' +Right (difference): ']~b]ai + +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant.[e~[ +]~b]user +Hello, please help me.[e~[ +]~b]ai +' +Common Suffix: 'I can help you with that.[e~[ +' +Left (difference): '' +Right (difference): ' +The user is asking for help. I should respond positively. + + +' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant.[e~[ +]~b]user +Hello, please help me.[e~[ +]~b]ai +I can help you with that.[e~[ +]~b]user +Thank you.[e~[ +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant. + +# Tools +You may call one or more tools to assist with the user query. +Here are the tools available in JSONSchema format: + + +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +When making tool calls, use XML format to invoke tools and pass parameters: + + + +param-value-1 +param-value-2 +... + +[e~[ +]~b]user +Hello, please help me.[e~[ +]~b]ai +' +Common Suffix: '[e~[ +' +Left (difference): 'Let me help you.' +Right (difference): ' + + +value1 +value2 + +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant. + +# Tools +You may call one or more tools to assist with the user query. +Here are the tools available in JSONSchema format: + + +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +When making tool calls, use XML format to invoke tools and pass parameters: + + + +param-value-1 +param-value-2 +... + +[e~[ +]~b]user +Hello, please help me.[e~[ +]~b]ai +' +Common Suffix: '[e~[ +]~b]user +Continue.[e~[ +' +Left (difference): 'Let me help you.' +Right (difference): ' + + +value1 +value2 + +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant. + +# Tools +You may call one or more tools to assist with the user query. +Here are the tools available in JSONSchema format: + + +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +When making tool calls, use XML format to invoke tools and pass parameters: + + + +param-value-1 +param-value-2 +... + +[e~[ +]~b]user +Hello, please help me.[e~[ +]~b]ai + + + +value1 +value2 + +' +Common Suffix: '[e~[ +' +Left (difference): '' +Right (difference): ' +value3 +value4 + +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant. + +# Tools +You may call one or more tools to assist with the user query. +Here are the tools available in JSONSchema format: + + +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +When making tool calls, use XML format to invoke tools and pass parameters: + + + +param-value-1 +param-value-2 +... + +[e~[ +]~b]user +Hello, please help me.[e~[ +]~b]ai + + + +value1 +value2 + +' +Common Suffix: '[e~[ +]~b]user +Continue.[e~[ +' +Left (difference): '' +Right (difference): ' +value3 +value4 + +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: ']~!b[]~b]system +You are a helpful assistant. + +# Tools +You may call one or more tools to assist with the user query. +Here are the tools available in JSONSchema format: + + +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +When making tool calls, use XML format to invoke tools and pass parameters: + + + +param-value-1 +param-value-2 +... + +[e~[ +]~b]user +Hello, please help me.[e~[ +]~b]ai +' +Common Suffix: ' + + +value1 +value2 + +[e~[ +' +Left (difference): '' +Right (difference): ' +I need to call the tool first. + + +' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. +Your knowledge base was last updated on 2023-10-01. The current date is 2026-01-26. + +When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. + +If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). +You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. +You follow these instructions in all languages, and always respond to the user in the language they use or request. +Next sections describe the capabilities that you have. + +# WEB BROWSING INSTRUCTIONS + +You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. + +# MULTI-MODAL INSTRUCTIONS + +You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. +You cannot read nor transcribe audio files or videos. + +# TOOL CALLING INSTRUCTIONS + +You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: + +1. When the request requires up-to-date information. +2. When the request requires specific data that you do not have in your knowledge base. +3. When the request involves actions that you cannot perform without tools. + +Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. +Your knowledge base was last updated on 2023-10-01. The current date is 2026-01-26. + +When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. + +If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). +You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. +You follow these instructions in all languages, and always respond to the user in the language they use or request. +Next sections describe the capabilities that you have. + +# WEB BROWSING INSTRUCTIONS + +You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. + +# MULTI-MODAL INSTRUCTIONS + +You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. +You cannot read nor transcribe audio files or videos. + +# TOOL CALLING INSTRUCTIONS + +You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: + +1. When the request requires up-to-date information. +2. When the request requires specific data that you do not have in your knowledge base. +3. When the request involves actions that you cannot perform without tools. + +Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. +Your knowledge base was last updated on 2023-10-01. The current date is 2026-01-26. + +When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. + +If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). +You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. +You follow these instructions in all languages, and always respond to the user in the language they use or request. +Next sections describe the capabilities that you have. + +# WEB BROWSING INSTRUCTIONS + +You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. + +# MULTI-MODAL INSTRUCTIONS + +You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. +You cannot read nor transcribe audio files or videos. + +# TOOL CALLING INSTRUCTIONS + +You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: + +1. When the request requires up-to-date information. +2. When the request requires specific data that you do not have in your knowledge base. +3. When the request involves actions that you cannot perform without tools. + +Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. +Your knowledge base was last updated on 2023-10-01. The current date is 2026-01-26. + +When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. + +If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). +You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. +You follow these instructions in all languages, and always respond to the user in the language they use or request. +Next sections describe the capabilities that you have. + +# WEB BROWSING INSTRUCTIONS + +You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. + +# MULTI-MODAL INSTRUCTIONS + +You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. +You cannot read nor transcribe audio files or videos. + +# TOOL CALLING INSTRUCTIONS + +You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: + +1. When the request requires up-to-date information. +2. When the request requires specific data that you do not have in your knowledge base. +3. When the request involves actions that you cannot perform without tools. + +Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.[INST]Thank you.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' +Analysis failed: +------------ +While executing CallExpression at line 91, column 40 in source: +...↵ {{- raise_exception("Tool call IDs should be alphanumeric s... + ^ +Error: Jinja Exception: Tool call IDs should be alphanumeric strings with length 9! + +================================================================================ + ANALYZING TEMPLATE: models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_start|>system +' +Common Suffix: '<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Left (difference): '' +Right (difference): '# Tools + +You have access to the following functions: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a function ONLY reply in the following format with NO suffix: + + + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + + + + +Reminder: +- Function calls MUST follow the specified format: an inner block must be nested within XML tags +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_start|>system +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_start|>assistant + +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: 'I can help you with that.<|im_end|> +' +Left (difference): '' +Right (difference): ' +The user is asking for help. I should respond positively. + +' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_start|>system +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: 'I can help you with that.<|im_end|> +<|im_start|>user +Thank you.<|im_end|> +' +Left (difference): '' +Right (difference): ' +' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_start|>system +# Tools + +You have access to the following functions: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a function ONLY reply in the following format with NO suffix: + + + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + + + + +Reminder: +- Function calls MUST follow the specified format: an inner block must be nested within XML tags +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' + + + +value1 + + +value2 + + + +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_start|>system +# Tools + +You have access to the following functions: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a function ONLY reply in the following format with NO suffix: + + + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + + + + +Reminder: +- Function calls MUST follow the specified format: an inner block must be nested within XML tags +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' + + + +value1 + + +value2 + + + +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_start|>system +# Tools + +You have access to the following functions: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a function ONLY reply in the following format with NO suffix: + + + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + + + + +Reminder: +- Function calls MUST follow the specified format: an inner block must be nested within XML tags +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + + + + +value1 + + +value2 + + + +' +Common Suffix: '<|im_end|> +' +Left (difference): '' +Right (difference): ' + + +value3 + + +value4 + + + +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_start|>system +# Tools + +You have access to the following functions: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a function ONLY reply in the following format with NO suffix: + + + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + + + + +Reminder: +- Function calls MUST follow the specified format: an inner block must be nested within XML tags +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + + + + +value1 + + +value2 + + + +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): '' +Right (difference): ' + + +value3 + + +value4 + + + +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +# Tools + +You have access to the following functions: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a function ONLY reply in the following format with NO suffix: + + + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + + + + +Reminder: +- Function calls MUST follow the specified format: an inner block must be nested within XML tags +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: ' + + + +value1 + + +value2 + + + +<|im_end|> +' +Left (difference): '' +Right (difference): ' +I need to call the tool first. +' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/NVIDIA-Nemotron-Nano-v2.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: 'System +' +Common Suffix: ' + +User +Hello, please help me. +' +Left (difference): '' +Right (difference): 'You can use the following tools to assist the user if required: +[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] + +If you decide to call any tool(s), use the following format: +[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] + +The user will execute tool-calls and return responses from tool(s) in this format: +[{{"tool_response1"}}, {{"tool_response2"}}] + +Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user.' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: 'System + + +User +Hello, please help me. +' +Common Suffix: '' +Left (difference): '' +Right (difference): 'Assistant + +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: 'System + + +User +Hello, please help me. +Assistant + +I can help you with that. + +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: 'System + + +User +Hello, please help me. +Assistant +I can help you with that. + +User +Thank you. +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: 'System +You can use the following tools to assist the user if required: +[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] + +If you decide to call any tool(s), use the following format: +[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] + +The user will execute tool-calls and return responses from tool(s) in this format: +[{{"tool_response1"}}, {{"tool_response2"}}] + +Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. + +User +Hello, please help me. +Assistant +' +Common Suffix: ' +' +Left (difference): ' +Let me help you. +' +Right (difference): '[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}] +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: 'System +You can use the following tools to assist the user if required: +[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] + +If you decide to call any tool(s), use the following format: +[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] + +The user will execute tool-calls and return responses from tool(s) in this format: +[{{"tool_response1"}}, {{"tool_response2"}}] + +Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. + +User +Hello, please help me. +Assistant +' +Common Suffix: ' + +User +Continue. +' +Left (difference): 'Let me help you.' +Right (difference): '[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}]' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: 'System +You can use the following tools to assist the user if required: +[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] + +If you decide to call any tool(s), use the following format: +[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] + +The user will execute tool-calls and return responses from tool(s) in this format: +[{{"tool_response1"}}, {{"tool_response2"}}] + +Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. + +User +Hello, please help me. +Assistant +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}' +Common Suffix: '] + +' +Left (difference): '' +Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}}' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: 'System +You can use the following tools to assist the user if required: +[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] + +If you decide to call any tool(s), use the following format: +[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] + +The user will execute tool-calls and return responses from tool(s) in this format: +[{{"tool_response1"}}, {{"tool_response2"}}] + +Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. + +User +Hello, please help me. +Assistant +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}' +Common Suffix: '] + +User +Continue. +' +Left (difference): '' +Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}}' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: 'System +You can use the following tools to assist the user if required: +[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] + +If you decide to call any tool(s), use the following format: +[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] + +The user will execute tool-calls and return responses from tool(s) in this format: +[{{"tool_response1"}}, {{"tool_response2"}}] + +Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. + +User +Hello, please help me. +Assistant +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}] + +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: ' +Common Suffix: ' Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Left (difference): '' +Right (difference): '{"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_start|>assistant +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +<|im_start|>user +Thank you.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Common Suffix: '<|im_end|> +' +Left (difference): '' +Right (difference): ' + +{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): '' +Right (difference): ' + +{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: ' +Common Suffix: ' Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Left (difference): '' +Right (difference): '{"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_start|>assistant +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +<|im_start|>user +Thank you.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Common Suffix: '<|im_end|> +' +Left (difference): '' +Right (difference): ' + +{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): '' +Right (difference): ' + +{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging + + Args: + param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +For each function call return a json object with function name and arguments within XML tags as follows: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/Qwen-QwQ-32B.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_start|>' +Common Suffix: 'user +Hello, please help me.<|im_end|> +' +Left (difference): '' +Right (difference): 'system + + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_start|>user +Hello, please help me.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_start|>assistant + +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +<|im_start|>user +Thank you.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_start|>system + + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Analysis failed: +------------ +While executing CallExpression at line 31, column 52 in source: +... {%- set content = message.content.split('')[-1].lstrip('\n') %}↵ ... + ^ +Error: Callee is not a function: got Undefined (hint: 'split') + +================================================================================ + ANALYZING TEMPLATE: models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' +Common Suffix: '<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Left (difference): '' +Right (difference): ' + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_start|>assistant +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +<|im_start|>user +Thank you.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Common Suffix: '<|im_end|> +' +Left (difference): '' +Right (difference): ' + +{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): '' +Right (difference): ' + +{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are Qwen, created by Alibaba Cloud. You are a helpful assistant. + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + +{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} +<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/Qwen3-Coder.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_start|>' +Common Suffix: 'user +Hello, please help me.<|im_end|> +' +Left (difference): '' +Right (difference): 'system +You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. + +# Tools + +You have access to the following tools: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a tool ONLY reply in the following format with NO suffix: + + + + +value_1 + + +value_2 + + + + + +Reminder: +- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_start|>user +Hello, please help me.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_start|>assistant +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +I can help you with that.<|im_end|> +<|im_start|>user +Thank you.<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_start|>system +You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. + +# Tools + +You have access to the following tools: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a tool ONLY reply in the following format with NO suffix: + + + + +value_1 + + +value_2 + + + + + +Reminder: +- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' + + +value1 + + +value2 + + +' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. + +# Tools + +You have access to the following tools: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a tool ONLY reply in the following format with NO suffix: + + + + +value_1 + + +value_2 + + + + + +Reminder: +- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' + + +value1 + + +value2 + + +' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_start|>system +You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. + +# Tools + +You have access to the following tools: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a tool ONLY reply in the following format with NO suffix: + + + + +value_1 + + +value_2 + + + + + +Reminder: +- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + + + +value1 + + +value2 + + +' +Common Suffix: '<|im_end|> +' +Left (difference): '' +Right (difference): ' + + + +value3 + + +value4 + + +' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_start|>system +You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. + +# Tools + +You have access to the following tools: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a tool ONLY reply in the following format with NO suffix: + + + + +value_1 + + +value_2 + + + + + +Reminder: +- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + + + +value1 + + +value2 + + +' +Common Suffix: '<|im_end|> +<|im_start|>user +Continue.<|im_end|> +' +Left (difference): '' +Right (difference): ' + + + +value3 + + +value4 + + +' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_start|>system +You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. + +# Tools + +You have access to the following tools: + + + +test_function_name +A test function for debugging + + +param1 +string +First parameter + + +param2 +string +Second parameter + +["param1", "param2"] + + + + +If you choose to call a tool ONLY reply in the following format with NO suffix: + + + + +value_1 + + +value_2 + + + + + +Reminder: +- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. +- Required parameters MUST be specified +- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after +- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls +<|im_end|> +<|im_start|>user +Hello, please help me.<|im_end|> +<|im_start|>assistant + + + +value1 + + +value2 + + +<|im_end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: false +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|User|>Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|User|>Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|Assistant|> +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|><|User|>Thank you.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' +Common Suffix: '' +Left (difference): 'Let me help you.<|end▁of▁sentence|>' +Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8b028d0, "param2":0x6414d8abda40} +```<|tool▁call▁end|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' +Common Suffix: '<|User|>Continue.' +Left (difference): 'Let me help you.<|end▁of▁sentence|>' +Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8abea30, "param2":0x6414d8aba0e0} +```<|tool▁call▁end|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8a' +Common Suffix: '' +Left (difference): 'ce970, "param2":0x6414d8abbb70} +```<|tool▁call▁end|>' +Right (difference): 'b5ac0, "param2":0x6414d8aba960} +```<|tool▁call▁end|> +<|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8b09df0, "param2":0x6414d8b0d3a0} +```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8' +Common Suffix: '<|User|>Continue.' +Left (difference): 'abb400, "param2":0x6414d8aff760} +```<|tool▁call▁end|>' +Right (difference): 'b17be0, "param2":0x6414d8ab7550} +```<|tool▁call▁end|> +<|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8aba960, "param2":0x6414d8ab5ac0} +```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8a' +Common Suffix: '0} +```<|tool▁call▁end|>' +Left (difference): 'def30, "param2":0x6414d8aba0e' +Right (difference): 'be4e0, "param2":0x6414d8ae266' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: false +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|User|>Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|User|>Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|Assistant|> +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|><|User|>Thank you.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' +Common Suffix: '<|end▁of▁sentence|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8ab9ec0, "param2":0x6414d8ac6240} +```<|tool▁call▁end|><|tool▁calls▁end|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' +Common Suffix: '<|end▁of▁sentence|><|User|>Continue.' +Left (difference): 'Let me help you.' +Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8aeabd0, "param2":0x6414d8abda40} +```<|tool▁call▁end|><|tool▁calls▁end|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8ab' +Common Suffix: '0} +```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' +Left (difference): 'b620, "param2":0x6414d8abd82' +Right (difference): '9420, "param2":0x6414d8ac14b0} +```<|tool▁call▁end|> +<|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8abbb70, "param2":0x6414d8abda4' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8a' +Common Suffix: '0} +```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|><|User|>Continue.' +Left (difference): 'c4150, "param2":0x6414d8abf2b' +Right (difference): 'bea30, "param2":0x6414d8aba410} +```<|tool▁call▁end|> +<|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8ab9420, "param2":0x6414d8ac46a' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8ab' +Common Suffix: '0} +```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' +Left (difference): '5ac0, "param2":0x6414d8adef3' +Right (difference): 'a960, "param2":0x6414d8ac4bf' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/deepseek-ai-DeepSeek-V3.1.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|User|>Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|User|>Hello, please help me.' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|Assistant|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|><|User|>Thank you.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' +Common Suffix: '<|end▁of▁sentence|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8ae5a10, "param2":0x6414d8abe810}<|tool▁call▁end|><|tool▁calls▁end|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' +Common Suffix: '<|end▁of▁sentence|><|User|>Continue.' +Left (difference): 'Let me help you.' +Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8ac7cb0, "param2":0x6414d8abe4e0}<|tool▁call▁end|><|tool▁calls▁end|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8a' +Common Suffix: '0}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' +Left (difference): 'c5f10, "param2":0x6414d8ac59c' +Right (difference): 'be4e0, "param2":0x6414d8ac2e30}<|tool▁call▁end|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8acbf00, "param2":0x6414d8ac7cb' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8' +Common Suffix: '0}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|><|User|>Continue.' +Left (difference): 'b0a810, "param2":0x6414d8adf7b' +Right (difference): 'ac2e30, "param2":0x6414d8ac46a0}<|tool▁call▁end|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8ac3e20, "param2":0x6414d8abe4e' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8ac' +Common Suffix: '0}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' +Left (difference): '7cb0, "param2":0x6414d8ad63e' +Right (difference): '59c0, "param2":0x6414d8abc0c' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> + +Thank you.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Left (difference): 'Let me help you.' +Right (difference): ' functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8ab9970, "param2":0x6414d8ac3380}}]' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Left (difference): 'Let me help you.' +Right (difference): ' functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8b04f50, "param2":0x6414d8ab6cd0}}]' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + + functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8a' +Common Suffix: '0}}]<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Left (difference): 'bb950, "param2":0x6414d8abb40' +Right (difference): 'c1290, "param2":0x6414d8abf2b0}}, {"name": "test_function_name", "arguments": {"param1":0x6414d8b04f50, "param2":0x6414d8adef3' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + + functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8a' +Common Suffix: '0}}]<|eot_id|><|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Left (difference): 'c4150, "param2":0x6414d8b103a' +Right (difference): 'bed60, "param2":0x6414d8ae0430}}, {"name": "test_function_name", "arguments": {"param1":0x6414d8adef30, "param2":0x6414d8b04f5' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are a helpful assistant with access to functions. +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: + +Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + + functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8a' +Common Suffix: '0}}]<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Left (difference): 'c46a0, "param2":0x6414d8b2231' +Right (difference): 'bb950, "param2":0x6414d8abb40' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/google-gemma-2-2b-it.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: false +supports_parallel_tool_calls: false +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: 'user +Hello, please help me. +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: 'user +Hello, please help me. +' +Common Suffix: '' +Left (difference): '' +Right (difference): 'model +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: 'user +Hello, please help me. +model +I can help you with that. +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: 'user +Hello, please help me. +model +I can help you with that. +user +Thank you. +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: 'user +Hello, please help me. +model +' +Common Suffix: ' +' +Left (difference): 'Let me help you.' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: 'user +Hello, please help me. +model +' +Common Suffix: ' +user +Continue. +' +Left (difference): 'Let me help you.' +Right (difference): '' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: 'user +Hello, please help me. +model + +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: 'user +Hello, please help me. +model + +user +Continue. +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: 'user +Hello, please help me. +model + +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: false +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful ' +Common Suffix: '<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +' +Left (difference): 'AI assistant.' +Right (difference): 'assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> +<|start_of_role|>available_tools<|end_of_role|>[ + { + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } + } +]' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful AI assistant.<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|start_of_role|>assistant<|end_of_role|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful AI assistant.<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +<|start_of_role|>assistant<|end_of_role|>I can help you with that.<|end_of_text|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful AI assistant.<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +<|start_of_role|>assistant<|end_of_role|>I can help you with that.<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Thank you.<|end_of_text|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> +<|start_of_role|>available_tools<|end_of_role|>[ + { + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } + } +]<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +<|start_of_role|>assistant<|end_of_role|>' +Common Suffix: '<|end_of_text|> +' +Left (difference): 'Let me help you.' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> +<|start_of_role|>available_tools<|end_of_role|>[ + { + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } + } +]<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +<|start_of_role|>assistant<|end_of_role|>' +Common Suffix: '<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Continue.<|end_of_text|> +' +Left (difference): 'Let me help you.' +Right (difference): '' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> +<|start_of_role|>available_tools<|end_of_role|>[ + { + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } + } +]<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +<|start_of_role|>assistant<|end_of_role|><|end_of_text|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> +<|start_of_role|>available_tools<|end_of_role|>[ + { + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } + } +]<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +<|start_of_role|>assistant<|end_of_role|><|end_of_text|> +<|start_of_role|>user<|end_of_role|>Continue.<|end_of_text|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> +<|start_of_role|>available_tools<|end_of_role|>[ + { + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } + } +]<|end_of_text|> +<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> +<|start_of_role|>assistant<|end_of_role|><|end_of_text|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/llama-cpp-deepseek-r1.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: false +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '' +Common Suffix: '<|User|>Hello, please help me.<|end▁of▁sentence|>' +Left (difference): '' +Right (difference): 'You can call any of the following function tools to satisfy the user's requests: [ + { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +] + +Example function tool call syntax: + +<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name +```json +{ + "arg1": "some_value" + ... +} +``` +<|tool▁call▁end|><|tool▁calls▁end|> + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|User|>Hello, please help me.<|end▁of▁sentence|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|Assistant|> +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|>I can help you with that.<|end▁of▁sentence|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|>I can help you with that.<|end▁of▁sentence|><|User|>Thank you.<|end▁of▁sentence|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ + { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +] + +Example function tool call syntax: + +<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name +```json +{ + "arg1": "some_value" + ... +} +``` +<|tool▁call▁end|><|tool▁calls▁end|> + +<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|>' +Common Suffix: '<|end▁of▁sentence|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8b06690, "param2":0x6414d8abf070} +```<|tool▁call▁end|><|tool▁calls▁end|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ + { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +] + +Example function tool call syntax: + +<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name +```json +{ + "arg1": "some_value" + ... +} +``` +<|tool▁call▁end|><|tool▁calls▁end|> + +<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|>' +Common Suffix: '<|end▁of▁sentence|><|User|>Continue.<|end▁of▁sentence|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8acb270, "param2":0x6414d8ab6cd0} +```<|tool▁call▁end|><|tool▁calls▁end|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ + { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +] + +Example function tool call syntax: + +<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name +```json +{ + "arg1": "some_value" + ... +} +``` +<|tool▁call▁end|><|tool▁calls▁end|> + +<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8' +Common Suffix: '0} +```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' +Left (difference): 'b09420, "param2":0x6414d8ac46a' +Right (difference): 'ac1290, "param2":0x6414d8ab6cd0} +```<|tool▁call▁end|> +<|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8b17be0, "param2":0x6414d8ae5f2' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ + { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +] + +Example function tool call syntax: + +<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name +```json +{ + "arg1": "some_value" + ... +} +``` +<|tool▁call▁end|><|tool▁calls▁end|> + +<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8' +Common Suffix: '0} +```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|><|User|>Continue.<|end▁of▁sentence|>' +Left (difference): 'ac2c10, "param2":0x6414d8aba63' +Right (difference): 'b06690, "param2":0x6414d8ab6cd0} +```<|tool▁call▁end|> +<|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8aba960, "param2":0x6414d8aba0e' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ + { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +] + +Example function tool call syntax: + +<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name +```json +{ + "arg1": "some_value" + ... +} +``` +<|tool▁call▁end|><|tool▁calls▁end|> + +<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name +```json +{"param1":0x6414d8' +Common Suffix: '0} +```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' +Left (difference): 'abf070, "param2":0x6414d8aba63' +Right (difference): 'b09420, "param2":0x6414d8b17be' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/meetkai-functionary-medium-v3.1.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + +' +Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|>' +Left (difference): '' +Right (difference): ' +You have access to the following functions: + +Use the function 'test_function_name' to 'A test function for debugging' +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +Think very carefully before calling functions. +If a you choose to call a function ONLY reply in the following format: +<{start_tag}={function_name}>{parameters}{end_tag} +where + +start_tag => ` a JSON dict with the function argument name as key and function argument value as value. +end_tag => `` + +Here is an example, +{"example_name": "example_value"} + +Reminder: +- If looking for real time information use relevant functions before falling back to brave_search +- Function calls MUST follow the specified format, start with +- Required parameters MUST be specified +- Only call one function at a time +- Put the entire function call reply on one line + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|start_header_id|>assistant<|end_header_id|> + +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> + +Thank you.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + + +You have access to the following functions: + +Use the function 'test_function_name' to 'A test function for debugging' +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +Think very carefully before calling functions. +If a you choose to call a function ONLY reply in the following format: +<{start_tag}={function_name}>{parameters}{end_tag} +where + +start_tag => ` a JSON dict with the function argument name as key and function argument value as value. +end_tag => `` + +Here is an example, +{"example_name": "example_value"} + +Reminder: +- If looking for real time information use relevant functions before falling back to brave_search +- Function calls MUST follow the specified format, start with +- Required parameters MUST be specified +- Only call one function at a time +- Put the entire function call reply on one line + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '' +Left (difference): 'Let me help you.<|eot_id|>' +Right (difference): '{"param1":0x6414d8ae7330, "param2":0x6414d8aaf400}<|eom_id|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + + +You have access to the following functions: + +Use the function 'test_function_name' to 'A test function for debugging' +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +Think very carefully before calling functions. +If a you choose to call a function ONLY reply in the following format: +<{start_tag}={function_name}>{parameters}{end_tag} +where + +start_tag => ` a JSON dict with the function argument name as key and function argument value as value. +end_tag => `` + +Here is an example, +{"example_name": "example_value"} + +Reminder: +- If looking for real time information use relevant functions before falling back to brave_search +- Function calls MUST follow the specified format, start with +- Required parameters MUST be specified +- Only call one function at a time +- Put the entire function call reply on one line + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|>' +Left (difference): 'Let me help you.<|eot_id|>' +Right (difference): '{"param1":0x6414d8ac1f50, "param2":0x6414d8aba630}<|eom_id|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + + +You have access to the following functions: + +Use the function 'test_function_name' to 'A test function for debugging' +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +Think very carefully before calling functions. +If a you choose to call a function ONLY reply in the following format: +<{start_tag}={function_name}>{parameters}{end_tag} +where + +start_tag => ` a JSON dict with the function argument name as key and function argument value as value. +end_tag => `` + +Here is an example, +{"example_name": "example_value"} + +Reminder: +- If looking for real time information use relevant functions before falling back to brave_search +- Function calls MUST follow the specified format, start with +- Required parameters MUST be specified +- Only call one function at a time +- Put the entire function call reply on one line + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +{"param1":0x6414d8ab' +Common Suffix: '0}<|eom_id|>' +Left (difference): '7aa0, "param2":0x6414d8abb62' +Right (difference): '62f0, "param2":0x6414d8aba410}{"param1":0x6414d8ac5360, "param2":0x6414d8b05ef' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + + +You have access to the following functions: + +Use the function 'test_function_name' to 'A test function for debugging' +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +Think very carefully before calling functions. +If a you choose to call a function ONLY reply in the following format: +<{start_tag}={function_name}>{parameters}{end_tag} +where + +start_tag => ` a JSON dict with the function argument name as key and function argument value as value. +end_tag => `` + +Here is an example, +{"example_name": "example_value"} + +Reminder: +- If looking for real time information use relevant functions before falling back to brave_search +- Function calls MUST follow the specified format, start with +- Required parameters MUST be specified +- Only call one function at a time +- Put the entire function call reply on one line + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +{"param1":0x6414d8ab' +Common Suffix: '0}<|eom_id|><|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|>' +Left (difference): '9420, "param2":0x6414d8ae027' +Right (difference): 'e5f0, "param2":0x6414d8b24fa0}{"param1":0x6414d8ab7000, "param2":0x6414d8ac492' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + + +Cutting Knowledge Date: December 2023 + + +You have access to the following functions: + +Use the function 'test_function_name' to 'A test function for debugging' +{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} + + +Think very carefully before calling functions. +If a you choose to call a function ONLY reply in the following format: +<{start_tag}={function_name}>{parameters}{end_tag} +where + +start_tag => ` a JSON dict with the function argument name as key and function argument value as value. +end_tag => `` + +Here is an example, +{"example_name": "example_value"} + +Reminder: +- If looking for real time information use relevant functions before falling back to brave_search +- Function calls MUST follow the specified format, start with +- Required parameters MUST be specified +- Only call one function at a time +- Put the entire function call reply on one line + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +{"param1":0x6414d8ac' +Common Suffix: '0}<|eom_id|>' +Left (difference): 'b270, "param2":0x6414d8abf80' +Right (difference): '1290, "param2":0x6414d8abaeb' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/meetkai-functionary-medium-v3.2.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +' +Common Suffix: '} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|>' +Left (difference): '' +Right (difference): '// A test function for debugging +type test_function_name = (_: { +// First parameter. +param1: string, +// Second parameter. +param2: string, +}) => any; + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|start_header_id|>assistant<|end_header_id|> + +>>>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +>>>all +I can help you with that.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +>>>all +I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> + +Thank you.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +// A test function for debugging +type test_function_name = (_: { +// First parameter. +param1: string, +// Second parameter. +param2: string, +}) => any; + +} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +>>>' +Common Suffix: '<|eot_id|>' +Left (difference): 'all +Let me help you.' +Right (difference): 'test_function_name +{"param1":0x6414d8af9280, "param2":0x6414d8af8a90}' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +// A test function for debugging +type test_function_name = (_: { +// First parameter. +param1: string, +// Second parameter. +param2: string, +}) => any; + +} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +>>>' +Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|>' +Left (difference): 'all +Let me help you.' +Right (difference): 'test_function_name +{"param1":0x6414d8ae3c80, "param2":0x6414d8b39240}' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +// A test function for debugging +type test_function_name = (_: { +// First parameter. +param1: string, +// Second parameter. +param2: string, +}) => any; + +} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +>>>test_function_name +{"param1":0x6414d8afff80, "param2":0x6414d8b3' +Common Suffix: '0}<|eot_id|>' +Left (difference): 'de2' +Right (difference): 'e8b0}>>>test_function_name +{"param1":0x6414d8ad7ff0, "param2":0x6414d8ae3c8' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +// A test function for debugging +type test_function_name = (_: { +// First parameter. +param1: string, +// Second parameter. +param2: string, +}) => any; + +} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +>>>test_function_name +{"param1":0x6414d8a' +Common Suffix: '0}<|eot_id|><|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|>' +Left (difference): 'e4d40, "param2":0x6414d8abf04' +Right (difference): 'b5ac0, "param2":0x6414d8b3e8b0}>>>test_function_name +{"param1":0x6414d8b210c0, "param2":0x6414d8b0e31' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Only execute function(s) when absolutely necessary. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +// Supported function definitions that should be called when necessary. +namespace functions { + +// A test function for debugging +type test_function_name = (_: { +// First parameter. +param1: string, +// Second parameter. +param2: string, +}) => any; + +} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +>>>test_function_name +{"param1":0x6414d8a' +Common Suffix: '0}<|eot_id|>' +Left (difference): 'ba960, "param2":0x6414d8ab9ca' +Right (difference): 'c4bf0, "param2":0x6414d8ae4d4' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +' +Common Suffix: 'Hello, please help me.<|eot_id|>' +Left (difference): '' +Right (difference): '{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|start_header_id|>assistant<|end_header_id|> + +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> + +Thank you.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|eot_id|>' +Left (difference): 'Let me help you.' +Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|>' +Left (difference): 'Let me help you.' +Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' +Analysis failed: +------------ +While executing CallExpression at line 71, column 32 in source: +... == 1 %}↵ {{- raise_exception("This model only supports single tool-c... + ^ +Error: Jinja Exception: This model only supports single tool-calls at once! + +================================================================================ + ANALYZING TEMPLATE: models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jan 2026 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +' +Common Suffix: 'Hello, please help me.<|eot_id|>' +Left (difference): '' +Right (difference): '{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jan 2026 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|start_header_id|>assistant<|end_header_id|> + +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jan 2026 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jan 2026 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> + +Thank you.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jan 2026 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|eot_id|>' +Left (difference): 'Let me help you.' +Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jan 2026 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|>' +Left (difference): 'Let me help you.' +Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' +Analysis failed: +------------ +While executing CallExpression at line 72, column 32 in source: +... == 1 %}↵ {{- raise_exception("This model only supports single tool-c... + ^ +Error: Jinja Exception: This model only supports single tool-calls at once! + +================================================================================ + ANALYZING TEMPLATE: models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +' +Common Suffix: 'Hello, please help me.<|eot_id|>' +Left (difference): '' +Right (difference): '{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|start_header_id|>assistant<|end_header_id|> + +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> + +Thank you.<|eot_id|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|eot_id|>' +Left (difference): 'Let me help you.' +Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|start_header_id|>system<|end_header_id|> + +Environment: ipython +Cutting Knowledge Date: December 2023 +Today Date: 26 Jul 2024 + +<|eot_id|><|start_header_id|>user<|end_header_id|> + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. + +{ + "type": "function", + "function": { + "name": "test_function_name", + "description": "A test function for debugging", + "parameters": { + "type": "object", + "properties": { + "param1": { + "type": "string", + "description": "First parameter" + }, + "param2": { + "type": "string", + "description": "Second parameter" + } + }, + "required": [ + "param1", + "param2" + ] + } + } +} + +Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +' +Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> + +Continue.<|eot_id|>' +Left (difference): 'Let me help you.' +Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' +Analysis failed: +------------ +While executing CallExpression at line 71, column 32 in source: +... == 1 %}↵ {{- raise_exception("This model only supports single tool-c... + ^ +Error: Jinja Exception: This model only supports single tool-calls at once! + +================================================================================ + ANALYZING TEMPLATE: models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '[SYSTEM_PROMPT]# HOW YOU SHOULD THINK AND ANSWER + +First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input. + +Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.[/SYSTEM_PROMPT]' +Common Suffix: '[INST]Hello, please help me.[/INST]' +Left (difference): '' +Right (difference): '[AVAILABLE_TOOLS][{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}][/AVAILABLE_TOOLS]' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '[SYSTEM_PROMPT]# HOW YOU SHOULD THINK AND ANSWER + +First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input. + +Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '[SYSTEM_PROMPT]# HOW YOU SHOULD THINK AND ANSWER + +First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input. + +Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '[SYSTEM_PROMPT]# HOW YOU SHOULD THINK AND ANSWER + +First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input. + +Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.[INST]Thank you.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' +Analysis failed: +------------ +While executing FilterExpression at line 90, column 37 in source: +... }}↵ {%- elif message['content'] | length > 0 %}↵ {%- for bloc... + ^ +Error: Unknown (built-in) filter 'length' for type None + +================================================================================ + ANALYZING TEMPLATE: models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: false +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '[INST]Hello, please help me.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '[INST]Hello, please help me.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '[INST]Hello, please help me.[/INST]I can help you with that.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '[INST]Hello, please help me.[/INST]I can help you with that.[INST]Thank you.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' +Analysis failed: +------------ +While executing CallExpression at line 62, column 36 in source: +...9 %}↵ {{- raise_exception("Tool call IDs should be alphanumeric s... + ^ +Error: Jinja Exception: Tool call IDs should be alphanumeric strings with length 9! + +================================================================================ + ANALYZING TEMPLATE: models/templates/moonshotai-Kimi-K2.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|im_system|>' +Common Suffix: 'system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' +Left (difference): '' +Right (difference): 'tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|im_assistant|>assistant<|im_middle|>' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|><|im_user|>user<|im_middle|>Thank you.<|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' +Common Suffix: '<|im_end|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' +Common Suffix: '<|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' +Left (difference): 'Let me help you.' +Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' +Common Suffix: '<|tool_calls_section_end|><|im_end|>' +Left (difference): '' +Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' +Common Suffix: '<|tool_calls_section_end|><|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' +Left (difference): '' +Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|><|im_end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/openai-gpt-oss-120b.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: true +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: false +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. +Knowledge cutoff: 2024-06 +Current date: 2026-01-26 + +Reasoning: medium + +# Valid channels: analysis, commentary, final. Channel must be included for every message.' +Common Suffix: '<|end|><|start|>user<|message|>Hello, please help me.<|end|>' +Left (difference): '' +Right (difference): ' +Calls to these tools must go to the commentary channel: 'functions'.<|end|><|start|>developer<|message|># Tools + +## functions + +namespace functions { + +// A test function for debugging +type test_function_name = (_: { +// First parameter +param1: string, +// Second parameter +param2: string, +}) => any; + +} // namespace functions' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. +Knowledge cutoff: 2024-06 +Current date: 2026-01-26 + +Reasoning: medium + +# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>Hello, please help me.<|end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|start|>assistant' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. +Knowledge cutoff: 2024-06 +Current date: 2026-01-26 + +Reasoning: medium + +# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>Hello, please help me.<|end|><|start|>assistant<|channel|>final<|message|>I can help you with that.<|return|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. +Knowledge cutoff: 2024-06 +Current date: 2026-01-26 + +Reasoning: medium + +# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>Hello, please help me.<|end|><|start|>assistant<|channel|>final<|message|>I can help you with that.<|end|><|start|>user<|message|>Thank you.<|end|>' +Common Suffix: '' +Left (difference): '' +Right (difference): '' +Analysis failed: +------------ +While executing BinaryExpression at line 264, column 53 in source: +...{%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<... + ^ +Error: Cannot perform operation on null values + +================================================================================ + ANALYZING TEMPLATE: models/templates/unsloth-Apriel-1.5.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].' +Common Suffix: ' +<|end|> +<|user|> +Hello, please help me. +<|end|> +' +Left (difference): '' +Right (difference): 'You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + +Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: +[{"name": , "arguments": }, {"name": , "arguments": },...] +<|end|> +<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. +<|end|> +<|user|> +Hello, please help me. +<|end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '<|assistant|> +' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. +<|end|> +<|user|> +Hello, please help me. +<|end|> +<|assistant|> +I can help you with that. +<|end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. +<|end|> +<|user|> +Hello, please help me. +<|end|> +<|assistant|> +I can help you with that. +<|end|> +<|user|> +Thank you. +<|end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without tool call (user, assistant) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + +Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: +[{"name": , "arguments": }, {"name": , "arguments": },...] +<|end|> +<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. +<|end|> +<|user|> +Hello, please help me. +<|end|> +<|assistant|> +' +Common Suffix: ' +<|end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}]' + +=== Diff: With vs Without tool call (user, assistant, user) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + +Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: +[{"name": , "arguments": }, {"name": , "arguments": },...] +<|end|> +<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. +<|end|> +<|user|> +Hello, please help me. +<|end|> +<|assistant|> +' +Common Suffix: ' +<|end|> +<|user|> +Continue. +<|end|> +' +Left (difference): 'Let me help you.' +Right (difference): ' +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}, "id": "call_001"}]' + +=== Diff: One vs Two tool calls (user, assistant) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + +Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: +[{"name": , "arguments": }, {"name": , "arguments": },...] +<|end|> +<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. +<|end|> +<|user|> +Hello, please help me. +<|end|> +<|assistant|> + +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}' +Common Suffix: '] +<|end|> +' +Left (difference): '' +Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}}' + +=== Diff: One vs Two tool calls (user, assistant, user) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + +Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: +[{"name": , "arguments": }, {"name": , "arguments": },...] +<|end|> +<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. +<|end|> +<|user|> +Hello, please help me. +<|end|> +<|assistant|> + +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}, "id": "call_001"}' +Common Suffix: '] +<|end|> +<|user|> +Continue. +<|end|> +' +Left (difference): '' +Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}, "id": "call_002"}' + +=== Diff: Tool call with vs without reasoning_content (user, assistant) === +Common Prefix: '<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: + +{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} + +Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: +[{"name": , "arguments": }, {"name": , "arguments": },...] +<|end|> +<|system|> +You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. +<|end|> +<|user|> +Hello, please help me. +<|end|> +<|assistant|> + +[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}] +<|end|> +' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Checking Reasoning Variables === +No reasoning/thinking-related variables were queried by the template + +================================================================================ + ANALYZING TEMPLATE: models/templates/unsloth-mistral-Devstral-Small-2507.jinja +================================================================================ + +=== Template Capabilities (from jinja::caps) === +supports_tools: false +supports_tool_calls: true +supports_system_role: true +supports_parallel_tool_calls: true +requires_typed_content: false + +=== Diff: With vs Without Tools (single user message) === +Common Prefix: '[SYSTEM_PROMPT]You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks. + + +Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed. +* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question. + + + +* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once. +* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations. + + + +* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it. +* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename. +* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times. + + + +* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself. +* When implementing solutions, focus on making the minimal changes needed to solve the problem. +* Before implementing any changes, first thoroughly understand the codebase through exploration. +* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate. + + + +* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise. +* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so. +* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible. +* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user. +* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification. + + + +* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise. +* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue. +* When updating a PR, preserve the original PR title and purpose, updating description only when necessary. + + + +1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions +2. ANALYSIS: Consider multiple approaches and select the most promising one +3. TESTING: + * For bug fixes: Create tests to verify issues before implementing fixes + * For new features: Consider test-driven development when appropriate + * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure + * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies +4. IMPLEMENTATION: Make focused, minimal changes to address the problem +5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests. + + + +* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect. +* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing. + + + +* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again. +* If you encounter missing dependencies: + 1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.) + 2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.) + 3. Only install individual packages directly if no dependency files are found or if only specific packages are needed +* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible. + + + +* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken: + 1. Step back and reflect on 5-7 different possible sources of the problem + 2. Assess the likelihood of each possible cause + 3. Methodically address the most likely causes, starting with the highest probability + 4. Document your reasoning process +* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding. +[/SYSTEM_PROMPT]' +Common Suffix: '[INST]Hello, please help me.[/INST]' +Left (difference): '' +Right (difference): '[AVAILABLE_TOOLS][{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}][/AVAILABLE_TOOLS]' + +=== Diff: With vs Without add_generation_prompt (single user message) === +Common Prefix: '[SYSTEM_PROMPT]You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks. + + +Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed. +* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question. + + + +* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once. +* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations. + + + +* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it. +* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename. +* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times. + + + +* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself. +* When implementing solutions, focus on making the minimal changes needed to solve the problem. +* Before implementing any changes, first thoroughly understand the codebase through exploration. +* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate. + + + +* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise. +* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so. +* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible. +* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user. +* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification. + + + +* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise. +* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue. +* When updating a PR, preserve the original PR title and purpose, updating description only when necessary. + + + +1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions +2. ANALYSIS: Consider multiple approaches and select the most promising one +3. TESTING: + * For bug fixes: Create tests to verify issues before implementing fixes + * For new features: Consider test-driven development when appropriate + * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure + * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies +4. IMPLEMENTATION: Make focused, minimal changes to address the problem +5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests. + + + +* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect. +* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing. + + + +* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again. +* If you encounter missing dependencies: + 1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.) + 2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.) + 3. Only install individual packages directly if no dependency files are found or if only specific packages are needed +* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible. + + + +* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken: + 1. Step back and reflect on 5-7 different possible sources of the problem + 2. Assess the likelihood of each possible cause + 3. Methodically address the most likely causes, starting with the highest probability + 4. Document your reasoning process +* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding. +[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant) === +Common Prefix: '[SYSTEM_PROMPT]You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks. + + +Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed. +* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question. + + + +* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once. +* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations. + + + +* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it. +* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename. +* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times. + + + +* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself. +* When implementing solutions, focus on making the minimal changes needed to solve the problem. +* Before implementing any changes, first thoroughly understand the codebase through exploration. +* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate. + + + +* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise. +* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so. +* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible. +* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user. +* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification. + + + +* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise. +* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue. +* When updating a PR, preserve the original PR title and purpose, updating description only when necessary. + + + +1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions +2. ANALYSIS: Consider multiple approaches and select the most promising one +3. TESTING: + * For bug fixes: Create tests to verify issues before implementing fixes + * For new features: Consider test-driven development when appropriate + * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure + * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies +4. IMPLEMENTATION: Make focused, minimal changes to address the problem +5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests. + + + +* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect. +* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing. + + + +* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again. +* If you encounter missing dependencies: + 1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.) + 2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.) + 3. Only install individual packages directly if no dependency files are found or if only specific packages are needed +* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible. + + + +* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken: + 1. Step back and reflect on 5-7 different possible sources of the problem + 2. Assess the likelihood of each possible cause + 3. Methodically address the most likely causes, starting with the highest probability + 4. Document your reasoning process +* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding. +[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.' +Common Suffix: '' +Left (difference): '' +Right (difference): '' + +=== Diff: With vs Without reasoning_content (user, assistant, user) === +Common Prefix: '[SYSTEM_PROMPT]You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks. + + +Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed. +* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question. + + + +* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once. +* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations. + + + +* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it. +* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename. +* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times. + + + +* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself. +* When implementing solutions, focus on making the minimal changes needed to solve the problem. +* Before implementing any changes, first thoroughly understand the codebase through exploration. +* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate. + + + +* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise. +* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so. +* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible. +* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user. +* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification. + + + +* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise. +* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue. +* When updating a PR, preserve the original PR title and purpose, updating description only when necessary. + + + +1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions +2. ANALYSIS: Consider multiple approaches and select the most promising one +3. TESTING: + * For bug fixes: Create tests to verify issues before implementing fixes + * For new features: Consider test-driven development when appropriate + * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure + * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies +4. IMPLEMENTATION: Make focused, minimal changes to address the problem +5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests. + + + +* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect. +* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing. + + + +* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again. +* If you encounter missing dependencies: + 1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.) + 2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.) + 3. Only install individual packages directly if no dependency files are found or if only specific packages are needed +* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible. + + + +* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken: + 1. Step back and reflect on 5-7 different possible sources of the problem + 2. Assess the likelihood of each possible cause + 3. Methodically address the most likely causes, starting with the highest probability + 4. Document your reasoning process +* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding. +[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.[INST]Thank you.[/INST]' +Common Suffix: '' +Left (difference): '' +Right (difference): '' +Analysis failed: +------------ +While executing MemberExpression at line 74, column 24 in source: +... {%- else %}↵ {{- message['content'][0]['text'] }}↵ {%- en... + ^ +Error: Cannot access property with non-string: got Integer + +================================================================================ + ANALYSIS COMPLETE +================================================================================ + \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1f29819f1f..ecc5e00c03 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -190,6 +190,7 @@ endif() llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp) llama_build_and_test(test-jinja.cpp) llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python) +llama_build_and_test(test-chat-auto-parser.cpp) llama_build_and_test(test-json-partial.cpp) llama_build_and_test(test-log.cpp) llama_build_and_test( @@ -199,6 +200,7 @@ llama_build_and_test( peg-parser/test-gbnf-generation.cpp peg-parser/test-json-parser.cpp peg-parser/test-json-serialization.cpp + peg-parser/test-python-dict-parser.cpp peg-parser/test-unicode.cpp peg-parser/tests.h ) diff --git a/tests/peg-parser/test-python-dict-parser.cpp b/tests/peg-parser/test-python-dict-parser.cpp new file mode 100644 index 0000000000..9db1154b45 --- /dev/null +++ b/tests/peg-parser/test-python-dict-parser.cpp @@ -0,0 +1,279 @@ +#include "tests.h" + +void test_python_dict_parser(testing &t) { + // Test parsing a simple Python dict object with single quotes + t.test("simple Python dict object parsing", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'name': 'test', 'value': 42, 'flag': true}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test parsing a Python dict array with mixed types + t.test("Python dict array with mixed types", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "[1, 'hello', true, null, 3.14]"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test parsing nested Python dict with objects and arrays + t.test("nested Python dict with objects and arrays", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = + "{'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 'count': 2, 'metadata': {'version': '1.0', 'tags': ['admin', 'user']}}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test parsing Python dict with escaped single quotes + t.test("Python dict with escaped single quotes", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'message': 'It\\'s working!'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test parsing Python dict with double quotes inside single quotes + t.test("Python dict with double quotes inside single quotes", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'quote': 'He said \"Hello\"'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test the example from the requirements + t.test("complex Python dict example from requirements", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test need_more_input() parsing - incomplete object + t.test("need_more_input() parsing - incomplete object", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'name': 'test', 'value': "; + common_peg_parse_context ctx(input, true); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_need_more_input", true, result.need_more_input()); + }); + + // Test need_more_input() parsing - incomplete single-quoted string + t.test("need_more_input() parsing - incomplete single-quoted string", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'name': 'test"; + common_peg_parse_context ctx(input, true); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_need_more_input", true, result.need_more_input()); + }); + + // Test unicode in Python dict strings + t.test("unicode in Python dict strings", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'message': 'Hello, 世界!'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test Python dict with unicode escapes + t.test("Python dict with unicode escapes", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{'unicode': 'Hello\\u0041'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_success", true, result.success()); + t.assert_equal("result_end", input.size(), result.end); + }); + + // Test that JSON double-quoted strings fail with Python dict parser + t.test("JSON double-quoted strings fail with Python dict parser", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); }); + + std::string input = "{\"name\": \"test\"}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + + t.assert_equal("result_is_fail", true, result.fail()); + }); + + // Test Python dict string content parser directly + t.test("python dict string content parser", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + return p.sequence({ p.literal("'"), p.python_dict_string_content(), p.literal("'"), p.space() }); + }); + + t.test("simple string", [&](testing &t) { + std::string input = "'hello'"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("string with escaped single quote", [&](testing &t) { + std::string input = "'it\\'s'"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("string with double quotes", [&](testing &t) { + std::string input = "'say \"hello\"'"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("incomplete string", [&](testing &t) { + std::string input = "'hello"; + common_peg_parse_context ctx(input, true); + + auto result = parser.parse(ctx); + t.assert_true("need_more_input", result.need_more_input()); + }); + }); + + // Test allow_python_dict_format flag usage + t.test("allow_python_dict_format flag", [](testing &t) { + t.test("flag is false by default", [&](testing &t) { + common_peg_parser_builder builder; + t.assert_equal("default_value", false, builder.get_allow_python_dict_format()); + }); + + t.test("flag can be set to true", [&](testing &t) { + common_peg_parser_builder builder; + builder.set_allow_python_dict_format(true); + t.assert_equal("after_set", true, builder.get_allow_python_dict_format()); + }); + + t.test("flag can be set back to false", [&](testing &t) { + common_peg_parser_builder builder; + builder.set_allow_python_dict_format(true); + builder.set_allow_python_dict_format(false); + t.assert_equal("after_reset", false, builder.get_allow_python_dict_format()); + }); + }); + + // Test that the flag actually affects json() parser behavior + t.test("json() parser with allow_python_dict_format flag", [](testing &t) { + t.test("json() rejects single quotes when flag is false", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(false); + return p.json(); + }); + + std::string input = "{'name': 'test'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("fail", result.fail()); + }); + + t.test("json() accepts single quotes when flag is true", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(true); + return p.json(); + }); + + std::string input = "{'name': 'test'}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("json() still accepts double quotes when flag is true", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(true); + return p.json(); + }); + + std::string input = "{\"name\": \"test\"}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("json() accepts mixed quote styles when flag is true", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(true); + return p.json(); + }); + + std::string input = "{\"name\": 'test', 'value': \"hello\"}"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + + t.test("complex nested structure with flag true", [&](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + p.set_allow_python_dict_format(true); + return p.json(); + }); + + std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }"; + common_peg_parse_context ctx(input); + + auto result = parser.parse(ctx); + t.assert_true("success", result.success()); + t.assert_equal("end", input.size(), result.end); + }); + }); +} diff --git a/tests/peg-parser/tests.h b/tests/peg-parser/tests.h index 4d3f4e9eaf..debd4286c5 100644 --- a/tests/peg-parser/tests.h +++ b/tests/peg-parser/tests.h @@ -22,3 +22,4 @@ void test_json_parser(testing &t); void test_gbnf_generation(testing &t); void test_unicode(testing &t); void test_json_serialization(testing &t); +void test_python_dict_parser(testing &t); diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp new file mode 100644 index 0000000000..015c90d408 --- /dev/null +++ b/tests/test-chat-auto-parser.cpp @@ -0,0 +1,1845 @@ +#include "chat-auto-parser-helpers.h" +#include "chat-diff-analyzer.h" +#include "chat-peg-parser.h" +#include "chat.h" +#include "peg-parser.h" +#include "testing.h" + +#include +#include +#include +#include + +static void test_calculate_diff_split_basic(testing & t); +static void test_calculate_diff_split_identical(testing & t); +static void test_calculate_diff_split_common_prefix(testing & t); +static void test_calculate_diff_split_common_suffix(testing & t); +static void test_calculate_diff_split_common_both(testing & t); +static void test_calculate_diff_split_empty_cases(testing & t); +static void test_calculate_diff_split_no_common(testing & t); +static void test_calculate_diff_split_single_char(testing & t); +static void test_calculate_diff_split_overlaps(testing & t); +static void test_calculate_diff_split_tag_boundaries(testing & t); +static void test_calculate_diff_split(testing & t); + +static void test_until_common_prefix_basic(testing & t); +static void test_until_common_prefix(testing & t); + +static void test_after_common_suffix_basic(testing & t); +static void test_after_common_suffix(testing & t); + +static void test_analyze_tool_call_pure_json(testing & t); +static void test_analyze_tool_call_function_name_markers(testing & t); +static void test_analyze_tool_call_full_markers(testing & t); +static void test_analyze_tool_call_edge_cases(testing & t); + +static void test_compare_variants_basic(testing & t); +static void test_compare_variants_messages_modifier(testing & t); +static void test_compare_variants_tools_modifier(testing & t); +static void test_compare_variants_both_modifiers(testing & t); +static void test_compare_variants_template_failure(testing & t); +static void test_compare_variants_identity(testing & t); +static void test_compare_variants(testing & t); + +// Seed-OSS template tool calling analysis tests +static void test_seed_oss_tool_analysis(testing & t); +static void test_seed_oss_tool_presence(testing & t); +static void test_seed_oss_call_count(testing & t); +static void test_seed_oss_function_names(testing & t); +static void test_seed_oss_argument_count(testing & t); +static void test_seed_oss_args_presence(testing & t); +static void test_seed_oss_tool_with_reasoning(testing & t); + +// Nemotron template analysis tests +static void test_nemotron_analysis(testing & t); +static void test_nemotron_reasoning_detection(testing & t); +static void test_nemotron_tool_format(testing & t); + +// CohereForAI template analysis tests +static void test_cohere_reasoning_detection(testing & t); +static void test_cohere_tool_format(testing & t); +static void test_cohere_analysis(testing & t); + +// Marker separation +static void test_marker_separation(testing & t); + +// standard_json_tools format tests +static void test_standard_json_tools_formats(testing & t); +static void test_standard_json_tools_openai(testing & t); +static void test_standard_json_tools_cohere(testing & t); +static void test_standard_json_tools_function_key(testing & t); + +// normalize_quotes_to_json tests +static void test_normalize_quotes_to_json(testing & t); +static void test_normalize_quotes_with_embedded_quotes(testing & t); + +// TAG_WITH_TAGGED argument parsing tests +static void test_tagged_args_with_embedded_quotes(testing & t); + +int main(int argc, char * argv[]) { + testing t(std::cout); + t.verbose = true; + + // usage: test-chat-auto-parser-helpers [filter_regex] + + if (argc > 1) { + t.set_filter(argv[1]); + } + + t.test("diff_split", test_calculate_diff_split); + t.test("common_prefix", test_until_common_prefix); + t.test("common_suffix", test_after_common_suffix); + t.test("compare_variants", test_compare_variants); + t.test("segments", test_marker_separation); + t.test("seed_oss_diffs", test_seed_oss_tool_analysis); + t.test("cohere", test_cohere_analysis); + t.test("nemotron", test_nemotron_analysis); + t.test("standard_json_tools", test_standard_json_tools_formats); + t.test("normalize_quotes_to_json", test_normalize_quotes_to_json); + t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes); + + return t.summary(); +} + +static void test_marker_separation(testing & t) { + auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker"); + auto single_diag_marker = segmentize_markers("pre_markerpost_marker"); + auto paired_markers = segmentize_markers("world"); + auto double_different_markers = segmentize_markers("[hello][world]"); + auto in_between = segmentize_markers("imdabada[hey]"); + + t.test("single_square_marker", [&] (testing & t) { + t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type); + t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type); + t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type); + + t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value); + t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value); + t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value); + }); + + t.test("single_diagonal_marker", [&] (testing & t) { + t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type); + t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type); + t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type); + + t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value); + t.assert_equal("second is ''", "", single_diag_marker[1].value); + t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value); + }); + + t.test("paired_markers", [&] (testing & t) { + t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type); + t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type); + t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type); + + t.assert_equal("first is ''", "", paired_markers[0].value); + t.assert_equal("second is 'world'", "world", paired_markers[1].value); + t.assert_equal("third is ''", "", paired_markers[2].value); + }); + + t.test("double_different_markers", [&] (testing & t) { + t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type); + t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type); + t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type); + t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type); + + t.assert_equal("first is ''", "", double_different_markers[0].value); + t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value); + t.assert_equal("third is ''", "", double_different_markers[2].value); + t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value); + }); + + t.test("in_between", [&] (testing & t) { + t.assert_equal("first is text", segment_type::TEXT, in_between[0].type); + t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type); + t.assert_equal("third is text", segment_type::TEXT, in_between[2].type); + t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type); + t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type); + t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type); + + t.assert_equal("first is 'im'", "im", in_between[0].value); + t.assert_equal("second is ''", "", in_between[1].value); + t.assert_equal("third is 'daba'", "daba", in_between[2].value); + t.assert_equal("fourth is ''", "", in_between[3].value); + t.assert_equal("fifth is 'da'", "da", in_between[4].value); + t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value); + }); +} + +static void test_calculate_diff_split(testing & t) { + t.test("calculate_diff_split basic", test_calculate_diff_split_basic); + t.test("calculate_diff_split identical", test_calculate_diff_split_identical); + t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix); + t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix); + t.test("calculate_diff_split common both", test_calculate_diff_split_common_both); + t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases); + t.test("calculate_diff_split no common", test_calculate_diff_split_no_common); + t.test("calculate_diff_split single char", test_calculate_diff_split_single_char); + t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps); + t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries); +} + +static void test_calculate_diff_split_basic(testing & t) { + diff_split result = calculate_diff_split("hello world", "hello test"); + t.assert_equal("prefix should be 'hello '", "hello ", result.prefix); + t.assert_equal("left should be 'world'", "world", result.left); + t.assert_equal("right should be 'test'", "test", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("abc", "xyz"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'abc'", "abc", result.left); + t.assert_equal("right should be 'xyz'", "xyz", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("prefixA suffix", "prefixB suffix"); + t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix); + t.assert_equal("left should be 'A'", "A", result.left); + t.assert_equal("right should be 'B'", "B", result.right); + t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix); +} + +static void test_calculate_diff_split_identical(testing & t) { + diff_split result = calculate_diff_split("hello", "hello"); + t.assert_equal("prefix should be 'hello'", "hello", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("", ""); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("a", "a"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_common_prefix(testing & t) { + diff_split result = calculate_diff_split("abcdef", "abcxyz"); + t.assert_equal("prefix should be 'abc'", "abc", result.prefix); + t.assert_equal("left should be 'def'", "def", result.left); + t.assert_equal("right should be 'xyz'", "xyz", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("same", "sameagain"); + t.assert_equal("prefix should be 'same'", "same", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'again'", "again", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("test", "testing"); + t.assert_equal("prefix should be 'test'", "test", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'ing'", "ing", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_common_suffix(testing & t) { + diff_split result = calculate_diff_split("123end", "456end"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be '123'", "123", result.left); + t.assert_equal("right should be '456'", "456", result.right); + t.assert_equal("suffix should be 'end'", "end", result.suffix); + + result = calculate_diff_split("start", "end"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'start'", "start", result.left); + t.assert_equal("right should be 'end'", "end", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("abcsuffix", "xyzsuffix"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'abc'", "abc", result.left); + t.assert_equal("right should be 'xyz'", "xyz", result.right); + t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix); +} + +static void test_calculate_diff_split_common_both(testing & t) { + diff_split result = calculate_diff_split("helloXworld", "helloYworld"); + t.assert_equal("prefix should be 'hello'", "hello", result.prefix); + t.assert_equal("left should be 'X'", "X", result.left); + t.assert_equal("right should be 'Y'", "Y", result.right); + t.assert_equal("suffix should be 'world'", "world", result.suffix); + + result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ"); + t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix); + t.assert_equal("left should be 'middle'", "middle", result.left); + t.assert_equal("right should be 'different'", "different", result.right); + t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix); + + result = calculate_diff_split("startAend", "startBend"); + t.assert_equal("prefix should be 'start'", "start", result.prefix); + t.assert_equal("left should be 'A'", "A", result.left); + t.assert_equal("right should be 'B'", "B", result.right); + t.assert_equal("suffix should be 'end'", "end", result.suffix); + + // Edge case: common prefix and suffix overlap + result = calculate_diff_split("aa", "ab"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be 'a'", "a", result.left); + t.assert_equal("right should be 'b'", "b", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_empty_cases(testing & t) { + // Empty left, non-empty right + diff_split result = calculate_diff_split("", "hello"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'hello'", "hello", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Non-empty left, empty right + result = calculate_diff_split("hello", ""); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'hello'", "hello", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Both empty + result = calculate_diff_split("", ""); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Left single char, empty right + result = calculate_diff_split("a", ""); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'a'", "a", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Empty left, right single char + result = calculate_diff_split("", "a"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'a'", "a", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_no_common(testing & t) { + diff_split result = calculate_diff_split("abc", "xyz"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'abc'", "abc", result.left); + t.assert_equal("right should be 'xyz'", "xyz", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("left", "right"); + // The algorithm finds "t" as a common suffix since both strings end with 't' + // This is the algorithm's actual behavior - it finds maximal common suffix + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'lef'", "lef", result.left); + t.assert_equal("right should be 'righ'", "righ", result.right); + t.assert_equal("suffix should be 't'", "t", result.suffix); + + result = calculate_diff_split("123", "456"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be '123'", "123", result.left); + t.assert_equal("right should be '456'", "456", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_single_char(testing & t) { + diff_split result = calculate_diff_split("a", "b"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'a'", "a", result.left); + t.assert_equal("right should be 'b'", "b", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("a", "a"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("a", "ab"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'b'", "b", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("ab", "a"); + t.assert_equal("prefix should be 'a'", "a", result.prefix); + t.assert_equal("left should be 'b'", "b", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_overlaps(testing & t) { + // One string is substring of another + diff_split result = calculate_diff_split("test", "testing"); + t.assert_equal("prefix should be 'test'", "test", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be 'ing'", "ing", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("testing", "test"); + t.assert_equal("prefix should be 'test'", "test", result.prefix); + t.assert_equal("left should be 'ing'", "ing", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Similar strings with one extra char at start + result = calculate_diff_split("Xtest", "Ytest"); + // The algorithm finds "test" as a common suffix since both strings end with "test" + // This is the algorithm's actual behavior - it finds maximal common suffix + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'X'", "X", result.left); + t.assert_equal("right should be 'Y'", "Y", result.right); + t.assert_equal("suffix should be 'test'", "test", result.suffix); + + // Similar strings with one extra char at end + result = calculate_diff_split("testX", "testY"); + t.assert_equal("prefix should be 'test'", "test", result.prefix); + t.assert_equal("left should be 'X'", "X", result.left); + t.assert_equal("right should be 'Y'", "Y", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Strings that are reverses + result = calculate_diff_split("abc", "cba"); + t.assert_equal("prefix should be empty", "", result.prefix); + t.assert_equal("left should be 'abc'", "abc", result.left); + t.assert_equal("right should be 'cba'", "cba", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); +} + +static void test_calculate_diff_split_tag_boundaries(testing & t) { + // Test with unclosed XML tags + diff_split result = calculate_diff_split("testcontent"); + // The fix_tag_boundaries should move incomplete tags appropriately + t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0); + t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != ""); + + // Test with unclosed brackets + result = calculate_diff_split("test[", "test]value"); + t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != ""); + + // Test with partial tags on both sides + result = calculate_diff_split("prefix", "prefixsuffix"); + // fix_tag_boundaries moves the incomplete '<' from prefix to left/right + t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix); + t.assert_equal("left should be ''", "", result.left); + t.assert_equal("right should be 'suffix'", "suffix", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Test with complex nested tags + result = calculate_diff_split("prefix
content
", "prefix
different
"); + // Algorithm finds "ent" as a common suffix because both strings end with it + // This is the actual algorithm behavior, though not semantically ideal + t.assert_equal("prefix should be 'prefix
'", "prefix
", result.prefix); + t.assert_equal("left should be 'cont'", "cont", result.left); + t.assert_equal("right should be 'differ'", "differ", result.right); + t.assert_equal("suffix should be 'ent
'", "ent
", result.suffix); + + // Test with unclosed angle bracket + result = calculate_diff_split("Hello ", "Hello test"); + t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix); + t.assert_true("left should contain ''", result.left.find("") != std::string::npos); + t.assert_equal("right should be 'test'", "test", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Test with unclosed square bracket + result = calculate_diff_split("test [array]", "test other"); + t.assert_equal("prefix should be 'test '", "test ", result.prefix); + t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos); + t.assert_equal("right should be 'other'", "other", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); + + // Test empty prefix and suffix with tags + result = calculate_diff_split("left", "righ"); + t.assert_equal("prefix should be ''", "", result.prefix); + t.assert_equal("left should be 'left'", "left", result.left); + t.assert_equal("right should be 'righ'", "righ", result.right); + t.assert_equal("suffix should be ''", "", result.suffix); + + { + // real case from template tests, simplified + std::string left = "PREFIX
Sure"; + std::string right = "PREFIXLemme thinkSure"; + result = calculate_diff_split(left, right); + t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix); + t.assert_equal("suffix should be
Sure", "Sure", result.suffix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be Lemme think", "Lemme think", result.right); + } + + { + // Real case: special tokens with |> boundary issue + // The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION + std::string prefix = "SOME_PREFIX"; + std::string suffix = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"; + std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE"; + std::string right_diff = + "<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", " + "\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n" + "]<|END_ACTION"; + + std::string left = prefix + left_diff + suffix; + std::string right = prefix + right_diff + suffix; + result = calculate_diff_split(left, right); + + t.assert_equal("special token prefix", prefix, result.prefix); + // The |> should be moved from suffix to complete the tokens + t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left); + t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos); + t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + result.suffix); + } +} + +static void test_until_common_prefix(testing & t) { + t.test("until_common_prefix basic", test_until_common_prefix_basic); +} + +static void test_until_common_prefix_basic(testing & t) { + // Test case from the user request + std::string result = until_common_prefix("", "", ""); + t.assert_equal("untilCommonPrefix should return ''", "", result); + + // Additional test cases to ensure robustness + // Test with different common prefix lengths + result = until_common_prefix("prefixsuffix", "different", "other"); + t.assert_equal("should return 'prefix'", "prefix", result); + + // Test when common prefix is at the start + result = until_common_prefix("rest", "left", "right"); + t.assert_equal("should return empty string when common prefix at start", "", result); + + // Test when there's no common prefix + result = until_common_prefix("something", "left", "right"); + t.assert_equal("should return empty string when no common prefix", "", result); + + // Test with empty strings + result = until_common_prefix("test", "", "right"); + t.assert_equal("should return empty string when left is empty", "", result); + + // Test with longer common prefix + result = until_common_prefix("abcXYZrest", "left", "right"); + t.assert_equal("should return 'abcXYZ'", "abcXYZ", result); +} + +static void test_after_common_suffix(testing & t) { + t.test("after_common_suffix basic", test_after_common_suffix_basic); +} + +static void test_after_common_suffix_basic(testing & t) { + // Test case from the user request + std::string result = after_common_suffix("100", + "100", + "535"); + t.assert_equal("afterCommonSuffix should return ''", "", result); + + // Test when common suffix is at the end + result = after_common_suffix("rest", "left", "right"); + t.assert_equal("should return empty string when common suffix at end", "", result); + + // Test with empty strings + result = after_common_suffix("test", "left", ""); + t.assert_equal("should return empty string when right is empty", "", result); + + // Test case with XML-like structure similar to the main example + result = after_common_suffix("value", + "value", + "different"); + t.assert_equal("should return ''", "", result); + + // Test with longer common suffix appearing at the end of full + result = after_common_suffix("prefixrest", "prefixleft", "prefixright"); + t.assert_equal("should return '' when common suffix is at end of full", "", result); + + // Test with common suffix appearing in middle but not at end + result = after_common_suffix("content", "value", "other"); + t.assert_equal("should return '' when common suffix appears before end", "", result); + + // Test with multi-character common suffix at the very end of full + result = after_common_suffix("startend", "prefixleft", "prefixright"); + t.assert_equal("should return '' when common suffix is at end of full", "", result); +} + +static void test_compare_variants(testing & t) { + t.test("compare_variants basic", test_compare_variants_basic); + t.test("compare_variants messages modifier", test_compare_variants_messages_modifier); + t.test("compare_variants tools modifier", test_compare_variants_tools_modifier); + t.test("compare_variants both modifiers", test_compare_variants_both_modifiers); + t.test("compare_variants template failure", test_compare_variants_template_failure); + t.test("compare_variants identity", test_compare_variants_identity); +} + +static void test_compare_variants_basic(testing & t) { + // Create a simple template that just echoes messages + common_chat_template tmpl("{{ messages[0]['content'] }}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "Hello"}} + }); + + auto modifier = [](template_params & p) { + p.messages[0]["content"] = "World"; + }; + + auto result = differential_analyzer::compare_variants(tmpl, params, modifier); + + t.assert_true("result should have value", result.has_value()); + // The template might not output anything if messages is empty or format is different + // Check that we get a valid result + t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty()); +} + +static void test_compare_variants_messages_modifier(testing & t) { + // Test with messages modifier only + common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "A"}} + }); + + auto modifier = [](template_params & p) { + p.messages[0]["content"] = "B"; + }; + + std::optional result = differential_analyzer::compare_variants(tmpl, params, modifier); + + t.assert_true("result should have value", result.has_value()); + t.assert_equal("left should be 'A'", "A", result->diff.left); + t.assert_equal("right should be 'B'", "B", result->diff.right); +} + +static void test_compare_variants_tools_modifier(testing & t) { + // Test with tools modifier only + common_chat_template tmpl( + "{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", ""); + + template_params params; + params.tools = json::array({ + json {{"name", "foo"}} + }); + + auto modifier = [](template_params & p) { + p.tools[0]["name"] = "bar"; + }; + + auto result = differential_analyzer::compare_variants(tmpl, params, modifier); + + t.assert_true("result should have value", result.has_value()); + t.assert_equal("left should be 'foo'", "foo", result->diff.left); + t.assert_equal("right should be 'bar'", "bar", result->diff.right); +} + +static void test_compare_variants_both_modifiers(testing & t) { + // Test with both messages and tools modifiers using the for loop approach + common_chat_template tmpl( + "{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "A"}} + }); + + auto modifier = [](template_params & p) { + p.messages[0]["content"] = "B"; + p.messages[0]["role"] = "newuser"; + }; + + auto result = differential_analyzer::compare_variants(tmpl, params, modifier); + + t.assert_true("result should have value", result.has_value()); + t.assert_equal("left should be 'user:A'", "user:A", result->diff.left); + t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right); +} + +static void test_compare_variants_template_failure(testing & t) { + // Test with template that causes failure during application (not construction) + // We use a valid template syntax but one that will fail during application + common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "Hello"}} + }); + + auto modifier = [](template_params & p) { + p.messages[0]["content"] = "World"; + }; + + auto result = differential_analyzer::compare_variants(tmpl, params, modifier); + + t.assert_true("result should be nullopt on template failure", !result.has_value()); +} + +static void test_compare_variants_identity(testing & t) { + // Test with identity modifier (no change) + common_chat_template tmpl("{{ messages[0]['content'] }}", "", ""); + + template_params params; + params.messages = json::array({ + json {{"role", "user"}, {"content", "Hello"}} + }); + + // No modifier - should use identity + auto result = differential_analyzer::compare_variants(tmpl, params, nullptr); + + t.assert_true("result should have value", result.has_value()); + t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix); + t.assert_equal("left should be empty", "", result->diff.left); + t.assert_equal("right should be empty", "", result->diff.right); + t.assert_equal("suffix should be empty", "", result->diff.suffix); +} + +// ============================================================================ +// Seed-OSS Template Tool Calling Analysis Tests +// ============================================================================ + +static void test_seed_oss_tool_analysis(testing & t) { + t.test("Seed-OSS tool presence", test_seed_oss_tool_presence); + t.test("Seed-OSS call count", test_seed_oss_call_count); + t.test("Seed-OSS function names", test_seed_oss_function_names); + t.test("Seed-OSS argument count", test_seed_oss_argument_count); + t.test("Seed-OSS args presence", test_seed_oss_args_presence); + t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning); +} + +// Helper to load Seed-OSS template +static common_chat_template load_seed_oss_template(testing & t) { + std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja"; + std::ifstream fin(template_path, std::ios::binary); + std::ostringstream buf; + if (fin.is_open()) { + buf << fin.rdbuf(); + } + std::string template_source = buf.str(); + common_chat_template tmpl(template_source, "", ""); + t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0); + return tmpl; +} + +// Helper to build tool call JSON +static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") { + return json{ + {"id", id}, + {"type", "function"}, + {"function", json{ + {"name", name}, + {"arguments", args} + }} + }; +} + +// Helper to build tools definition +static json build_tools_definition() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["param1"] = json::object({ + {"type", "string"}, + {"description", "First parameter"} + }); + parameters_schema["properties"]["param2"] = json::object({ + {"type", "string"}, + {"description", "Second parameter"} + }); + parameters_schema["required"] = json::array({"param1", "param2"}); + + return json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "test_function_name"}, + {"description", "A test function for debugging"}, + {"parameters", parameters_schema} + }} + } + }); +} + +// T1: Compare with/without tool call (user, assistant) +static void test_seed_oss_tool_presence(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + json assistant_no_tools = json{ + {"role", "assistant"}, + {"content", "Let me help you."} + }; + + json assistant_with_tools = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello, please help me."} + }; + + template_params params_no_tools; + params_no_tools.messages = json::array({user_msg, assistant_no_tools}); + params_no_tools.tools = build_tools_definition(); + params_no_tools.add_generation_prompt = false; + params_no_tools.enable_thinking = true; + + template_params params_with_tools; + params_with_tools.messages = json::array({user_msg, assistant_with_tools}); + params_with_tools.tools = build_tools_definition(); + params_with_tools.add_generation_prompt = false; + params_with_tools.enable_thinking = true; + + auto result = differential_analyzer::compare_variants(tmpl, params_no_tools, + [&](template_params & p) { + p.messages = params_with_tools.messages; + }); + + t.assert_true("T1 result should have value", result.has_value()); + + const auto & diff = result->diff; + t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos); + t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos); + t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos); + + // Left should be the assistant content without tool + t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left); + + // Right should contain the tool call markers + t.assert_true("T1 right should contain tool_call begin", diff.right.find("") != std::string::npos); + t.assert_true("T1 right should contain function tag", diff.right.find("") != std::string::npos); + t.assert_true("T1 right should contain parameter=param1", diff.right.find("") != std::string::npos); + t.assert_true("T1 right should contain parameter=param2", diff.right.find("") != std::string::npos); + t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos); + t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos); + t.assert_true("T1 right should contain tool_call end", diff.right.find("") != std::string::npos); + + // Suffix should be the eos token + t.assert_equal("T1 suffix should be ''", "", diff.suffix); +} + +// T2: Compare one vs two tool calls +static void test_seed_oss_call_count(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + json assistant_one_call = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; + + json assistant_two_calls = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})), + build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002") + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello, please help me."} + }; + + template_params params_one; + params_one.messages = json::array({user_msg, assistant_one_call}); + params_one.tools = build_tools_definition(); + params_one.add_generation_prompt = false; + params_one.enable_thinking = true; + + auto result = differential_analyzer::compare_variants(tmpl, params_one, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_two_calls}); + }); + + t.assert_true("T2 result should have value", result.has_value()); + + const auto & diff = result->diff; + + // Prefix should include the first tool call + t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("") != std::string::npos); + t.assert_true("T2 prefix should contain first function", diff.prefix.find("") != std::string::npos); + t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos); + t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos); + t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("") != std::string::npos); + + // Left should be empty (no second tool call in variant A) + t.assert_equal("T2 left should be empty", "", diff.left); + + // Right should contain the second tool call + t.assert_true("T2 right should contain second tool_call begin", diff.right.find("") != std::string::npos); + t.assert_true("T2 right should contain second function", diff.right.find("") != std::string::npos); + t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos); + t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos); + t.assert_true("T2 right should contain second tool_call end", diff.right.find("") != std::string::npos); + + // Suffix should be the eos token + t.assert_equal("T2 suffix should be ''", "", diff.suffix); +} + +// T3: Compare different function names +static void test_seed_oss_function_names(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + // Build tools with two different function names + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["arg1"] = json::object({ + {"type", "string"}, + {"description", "Argument 1"} + }); + parameters_schema["required"] = json::array({"arg1"}); + + json tools = json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "func_alpha"}, + {"description", "First function"}, + {"parameters", parameters_schema} + }} + }, + json{ + {"type", "function"}, + {"function", json{ + {"name", "func_beta"}, + {"description", "Second function"}, + {"parameters", parameters_schema} + }} + } + }); + + json assistant_func_alpha = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("func_alpha", json::object({{"arg1", "test_value"}})) + })} + }; + + json assistant_func_beta = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("func_beta", json::object({{"arg1", "test_value"}})) + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello"} + }; + + template_params params_alpha; + params_alpha.messages = json::array({user_msg, assistant_func_alpha}); + params_alpha.tools = tools; + params_alpha.add_generation_prompt = false; + params_alpha.enable_thinking = true; + + auto result = differential_analyzer::compare_variants(tmpl, params_alpha, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_func_beta}); + }); + + t.assert_true("T3 result should have value", result.has_value()); + + const auto & diff = result->diff; + + bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos; + bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos; + bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos; + bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos; + bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos; + + // Left should contain func_alpha (or be in prefix) + t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix); + + // Right should contain func_beta + t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix); + + // Both should have the same parameter value (in common parts, not in diffs) + // Since both have same args, test_value will be in prefix/suffix + t.assert_true("T3 diff should contain test_value (in prefix or suffix)", + diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos); +} + +// T4: Compare different argument counts (zero, one, two parameters) +static void test_seed_oss_argument_count(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + // Build tools with 0, 1, or 2 required parameters + json params_2_required = json::object(); + params_2_required["type"] = "object"; + params_2_required["properties"] = json::object(); + params_2_required["properties"]["arg1"] = json::object({ + {"type", "string"}, + {"description", "Argument 1"} + }); + params_2_required["properties"]["arg2"] = json::object({ + {"type", "string"}, + {"description", "Argument 2"} + }); + params_2_required["required"] = json::array({"arg1", "arg2"}); + + json params_1_required = json::object(); + params_1_required["type"] = "object"; + params_1_required["properties"] = json::object(); + params_1_required["properties"]["arg1"] = json::object({ + {"type", "string"}, + {"description", "Argument 1"} + }); + params_1_required["required"] = json::array({"arg1"}); + + json tools = json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "test_func"}, + {"description", "Test function"}, + {"parameters", params_2_required} + }} + } + }); + + // Test: zero args vs one arg + json assistant_zero_args = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_func", json::object()) + })} + }; + + json assistant_one_arg = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_func", json::object({{"arg1", "value1"}})) + })} + }; + + json assistant_two_args = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}})) + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello"} + }; + + // Test zero vs one + template_params params_zero; + params_zero.messages = json::array({user_msg, assistant_zero_args}); + params_zero.tools = tools; + params_zero.add_generation_prompt = false; + params_zero.enable_thinking = true; + + auto result_zero_one = differential_analyzer::compare_variants(tmpl, params_zero, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_one_arg}); + }); + + t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value()); + t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == ""); + t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos); + + // Test one vs two + template_params params_one; + params_one.messages = json::array({user_msg, assistant_one_arg}); + params_one.tools = tools; + params_one.add_generation_prompt = false; + params_one.enable_thinking = true; + + auto result_one_two = differential_analyzer::compare_variants(tmpl, params_one, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_two_args}); + }); + + t.assert_true("T4 one vs two result should have value", result_one_two.has_value()); + + const auto & diff4 = result_one_two->diff; + t.assert_true("T4 one vs two left should contain arg1 (or prefix)", + diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos); + t.assert_true("T4 one vs two right should contain arg1 (or prefix)", + diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos); + t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)", + diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos); +} + +// T5: Compare different argument values +static void test_seed_oss_args_presence(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + json assistant_same_arg = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}})) + })} + }; + + json assistant_other_arg = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param2", "value2"}})) + })} + }; + + json assistant_both_args = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello"} + }; + + template_params params_same; + params_same.messages = json::array({user_msg, assistant_same_arg}); + params_same.tools = build_tools_definition(); + params_same.add_generation_prompt = false; + params_same.enable_thinking = true; + + // Test same arg vs other arg + auto result_same_other = differential_analyzer::compare_variants(tmpl, params_same, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_other_arg}); + }); + + t.assert_true("T5 same vs other result should have value", result_same_other.has_value()); + const auto & diff5a = result_same_other->diff; + t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)", + diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos); + t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)", + diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos); + t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)", + diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos); + t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)", + diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos); + + // Test same arg vs both args + auto result_same_both = differential_analyzer::compare_variants(tmpl, params_same, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_both_args}); + }); + + t.assert_true("T5 same vs both result should have value", result_same_both.has_value()); + const auto & diff5b = result_same_both->diff; + t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)", + diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos); + t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)", + diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos); + t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)", + diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos); +} + +// T6: Tool call with vs without reasoning_content +static void test_seed_oss_tool_with_reasoning(testing & t) { + common_chat_template tmpl = load_seed_oss_template(t); + + json assistant_tool_only = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; + + json assistant_tool_with_reasoning = json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })}, + {"reasoning_content", "I need to call the tool first."} + }; + + json user_msg = json{ + {"role", "user"}, + {"content", "Hello, please help me."} + }; + + template_params params_tool_only; + params_tool_only.messages = json::array({user_msg, assistant_tool_only}); + params_tool_only.tools = build_tools_definition(); + params_tool_only.add_generation_prompt = false; + params_tool_only.enable_thinking = true; + + auto result = differential_analyzer::compare_variants(tmpl, params_tool_only, + [&](template_params & p) { + p.messages = json::array({user_msg, assistant_tool_with_reasoning}); + }); + + t.assert_true("T6 result should have value", result.has_value()); + + const auto & diff = result->diff; + + // Left should be empty (no reasoning in variant A) + t.assert_equal("T6 left should be empty", "", diff.left); + + // Right should contain the thinking token with reasoning content + t.assert_true("T6 right should contain think begin", diff.right.find("") != std::string::npos); + t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos); + t.assert_true("T6 right should contain think end", diff.right.find("") != std::string::npos); + + // Prefix should contain the assistant role + t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos); + + // Suffix should contain the tool call + t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("") != std::string::npos); + t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos); + t.assert_true("T6 suffix should contain eos", diff.suffix.find("") != std::string::npos); +} + +static common_chat_template load_template(testing & t, const std::string & template_path) { + std::ifstream fin(template_path, std::ios::binary); + std::ostringstream buf; + if (fin.is_open()) { + buf << fin.rdbuf(); + } + std::string template_source = buf.str(); + common_chat_template tmpl(template_source, "", ""); + t.assert_true("Nemotron template loaded successfully", template_source.length() > 0); + return tmpl; +} + +// ============================================================================ +// Nemotron Template Analysis Tests +// ============================================================================ +static common_chat_template load_nemotron_template(testing & t) { + return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja"); +} + +static void test_nemotron_analysis(testing & t) { + t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection); + t.test("Nemotron tool format", test_nemotron_tool_format); +} + +static void test_nemotron_reasoning_detection(testing & t) { + common_chat_template tmpl = load_nemotron_template(t); + + // Test the comparison manually to see what's happening + json user_msg = json{ { "role", "user" }, { "content", "Hello" } }; + json assistant_no_reasoning = json{ + { "role", "assistant" }, + { "content", "I can help." } + }; + json assistant_with_reasoning = json{ + { "role", "assistant" }, + { "content", "I can help." }, + { "reasoning_content", "Let me think about this." } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_reasoning }); + params.add_generation_prompt = false; + params.enable_thinking = true; + + // Run differential analysis + auto analysis = differential_analyzer::analyze(tmpl); + + // Check reasoning markers + t.assert_equal("reasoning_start should be ''", "", analysis.markers.reasoning_start); + t.assert_equal("reasoning_end should be ''", "", analysis.markers.reasoning_end); + + // Check reasoning mode detection + // Nemotron uses forced closed reasoning with add_generation_prompt + t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning); + + // Make sure reasoning markers don't spill over to content markers + t.assert_equal("content start should be empty", "", analysis.markers.content_start); + t.assert_equal("content end should be empty", "", analysis.markers.content_end); + + t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content); +} + +static void test_nemotron_tool_format(testing & t) { + common_chat_template tmpl = load_nemotron_template(t); + + // Run differential analysis + auto analysis = differential_analyzer::analyze(tmpl); + + // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped) + t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.markers.tool_section_start); + t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.markers.tool_section_end); + t.assert_equal("per_call_start should be '\\n'", "\n", analysis.markers.per_call_start); + t.assert_equal("per_call_end should be ''", "", analysis.markers.per_call_end); + t.assert_true("should support parallel calls", analysis.supports_parallel_calls); + + // Check function markers + t.assert_equal("func_name_prefix should be '\\n'", ">\n", analysis.markers.func_name_suffix); + t.assert_equal("func_close should be ''", "", analysis.markers.func_close); + + // Check argument markers (note: markers retain trailing newlines for proper parsing) + t.assert_equal("arg_name_prefix should be '\\n'", ">\n", analysis.markers.arg_name_suffix); + t.assert_equal("arg_value_suffix should be '\\n'", "\n", analysis.markers.arg_value_suffix); + + // Check format classification + t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools == tool_format::TAG_WITH_TAGGED); + + // Verify tool support + t.assert_true("should support tools", analysis.supports_tools); +} + +static common_chat_template load_cohere_template(testing & t) { + return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"); +} + +static void test_cohere_analysis(testing & t) { + t.test("Cohere reasoning detection", test_cohere_reasoning_detection); + t.test("Cohere tool format", test_cohere_tool_format); +} + +static void test_cohere_reasoning_detection(testing & t) { + common_chat_template tmpl = load_cohere_template(t); + + // Run differential analysis + auto analysis = differential_analyzer::analyze(tmpl); + + // Check reasoning markers - Cohere uses special token format + t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.markers.reasoning_start); + t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.markers.reasoning_end); + + // Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY) + t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning); + + // Check content markers - Cohere wraps all content with START/END_RESPONSE + t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.markers.content_start); + t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.markers.content_end); + + // Content is always wrapped (both with and without tools) + t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content); +} + +static void test_cohere_tool_format(testing & t) { + common_chat_template tmpl = load_cohere_template(t); + + // Run differential analysis + auto analysis = differential_analyzer::analyze(tmpl); + + // Check tool section markers - Cohere uses ACTION markers + t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.markers.tool_section_start); + t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.markers.tool_section_end); + + // JSON_NATIVE format has no per-call markers + t.assert_equal("per_call_start should be empty", "", analysis.markers.per_call_start); + t.assert_equal("per_call_end should be empty", "", analysis.markers.per_call_end); + + // JSON_NATIVE format has empty function markers (no XML-style markers) + t.assert_equal("func_name_prefix should be empty", "", analysis.markers.func_name_prefix); + t.assert_equal("func_name_suffix should be empty", "", analysis.markers.func_name_suffix); + t.assert_equal("func_close should be empty", "", analysis.markers.func_close); + + // JSON_NATIVE format has empty args markers + t.assert_equal("args_start should be empty", "", analysis.markers.args_start); + t.assert_equal("args_end should be empty", "", analysis.markers.args_end); + + // JSON_NATIVE format has empty argument markers + t.assert_equal("arg_name_prefix should be empty", "", analysis.markers.arg_name_prefix); + t.assert_equal("arg_name_suffix should be empty", "", analysis.markers.arg_name_suffix); + t.assert_equal("arg_value_prefix should be empty", "", analysis.markers.arg_value_prefix); + t.assert_equal("arg_value_suffix should be empty", "", analysis.markers.arg_value_suffix); + t.assert_equal("arg_separator should be empty", "", analysis.markers.arg_separator); + + // Check JSON field names - Cohere uses non-standard names + t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.name_field); + t.assert_equal("args_field should be 'parameters'", "parameters", analysis.args_field); + // This isn't a real tool call id field, i.e. with the OpenAI tool call ID format + t.assert_equal("id_field should be 'tool_call_id'", "", analysis.id_field); + + // Check format classification + t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools); + + // Check flags + t.assert_true("should support tools", analysis.supports_tools); + t.assert_true("should support parallel calls", analysis.supports_parallel_calls); + t.assert_true("should not require nonnull content", !analysis.requires_nonnull_content); + t.assert_true("tools_array_wrapped should be true", analysis.tools_array_wrapped); +} + +// ============================================================================ +// standard_json_tools Format Tests +// ============================================================================ + +// Helper to build tools definition for tests +static json build_test_tools() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["location"] = json::object({ + {"type", "string"}, + {"description", "The city and state"} + }); + parameters_schema["properties"]["unit"] = json::object({ + {"type", "string"}, + {"description", "Temperature unit"}, + {"enum", json::array({"celsius", "fahrenheit"})} + }); + parameters_schema["required"] = json::array({"location"}); + + return json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "get_current_weather"}, + {"description", "Get the current weather in a given location"}, + {"parameters", parameters_schema} + }} + } + }); +} + +static void test_standard_json_tools_formats(testing & t) { + t.test("OpenAI format", test_standard_json_tools_openai); + t.test("Cohere format", test_standard_json_tools_cohere); + t.test("function-as-key format", test_standard_json_tools_function_key); +} + +// Test 1: OpenAI Standard Format +// {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}} +static void test_standard_json_tools_openai(testing & t) { + json tools = build_test_tools(); + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto tool_call = p.standard_json_tools( + "", "", tools, + /* parallel */ true, + /* force */ false, + /* name_key */ "function.name", + /* args_key */ "function.arguments", + /* array_wrapped */ false, + /* function_is_key */ false, + /* call_id_key */ "id", + /* gen_call_id_key */ "", + /* parameters_order */ {} + ); + return p.content(p.until("")) + p.optional(tool_call) + p.end(); + }); + + std::string input = + "Let me check the weather." + "" + R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("parse success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); + t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id); + } + t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos); +} + +// Test 2: Cohere Format +// {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}} +static void test_standard_json_tools_cohere(testing & t) { + json tools = build_test_tools(); + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto tool_call = p.standard_json_tools( + "<|START_ACTION|>[", "]<|END_ACTION|>", tools, + /* parallel */ true, + /* force */ false, + /* name_key */ "tool_name", + /* args_key */ "parameters", + /* array_wrapped */ false, // Brackets are part of section markers + /* function_is_key */ false, + /* call_id_key */ "", + /* gen_call_id_key */ "tool_call_id", + /* parameters_order */ {"tool_call_id", "tool_name", "parameters"} + ); + return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end(); + }); + + std::string input = + "Let me search for that." + "<|START_ACTION|>[" + R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})" + "]<|END_ACTION|>"; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("parse success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); + t.assert_equal("tool id", "0", msg.tool_calls[0].id); + } + t.assert_true("content present", msg.content.find("Let me search") != std::string::npos); +} + +// Test 3: Function-as-Key Format +// {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}} +static void test_standard_json_tools_function_key(testing & t) { + json tools = build_test_tools(); + + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + auto tool_call = p.standard_json_tools( + "[", "]", tools, + /* parallel */ true, + /* force */ false, + /* name_key */ "", // Name is the key itself + /* args_key */ "args", + /* array_wrapped */ false, + /* function_is_key */ true, + /* call_id_key */ "id", + /* gen_call_id_key */ "", + /* parameters_order */ {} + ); + return p.content(p.until("")) + p.optional(tool_call) + p.end(); + }); + + std::string input = + "I'll call the weather function." + "[" + R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})" + "]"; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("parse success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name); + t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id); + } + t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos); +} + +// ============================================================================ +// normalize_quotes_to_json Tests +// ============================================================================ + +// Copy of the function for isolated testing (original is static in chat-peg-parser.cpp) +static std::string normalize_quotes_to_json(const std::string & input) { + std::string result; + result.reserve(input.size() + 16); + + bool in_single_quoted = false; + bool in_double_quoted = false; + + for (size_t i = 0; i < input.size(); ++i) { + char c = input[i]; + + if (c == '\\' && i + 1 < input.size()) { + char next = input[i + 1]; + + if (in_single_quoted) { + if (next == '\'') { + result += '\''; + ++i; + continue; + } + if (next == '"') { + result += "\\\""; + ++i; + continue; + } + result += c; + result += next; + ++i; + continue; + } + + if (in_double_quoted) { + result += c; + result += next; + ++i; + continue; + } + + result += c; + continue; + } + + if (c == '"') { + if (in_single_quoted) { + result += "\\\""; + } else { + in_double_quoted = !in_double_quoted; + result += c; + } + } else if (c == '\'') { + if (in_double_quoted) { + result += c; + } else if (in_single_quoted) { + in_single_quoted = false; + result += '"'; + } else { + in_single_quoted = true; + result += '"'; + } + } else { + result += c; + } + } + + return result; +} + +static void test_normalize_quotes_to_json(testing & t) { + t.test("basic single to double quotes", [](testing & t) { + std::string input = "{'key': 'value'}"; + std::string expected = "{\"key\": \"value\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("basic conversion", expected, result); + }); + + t.test("escaped single quote inside single-quoted string", [](testing & t) { + std::string input = "{'code': 'print(\\'hello\\')'}"; + std::string expected = "{\"code\": \"print('hello')\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("escaped single quote", expected, result); + }); + + t.test("double quote inside single-quoted string", [](testing & t) { + std::string input = "{'msg': 'He said \"hi\"'}"; + std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("double quote escaping", expected, result); + }); + + t.test("nested backslash escapes", [](testing & t) { + std::string input = "{'path': 'C:\\\\Users\\\\test'}"; + std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("backslash escaping", expected, result); + }); + + t.test("newline escapes", [](testing & t) { + std::string input = "{'text': 'line1\\nline2'}"; + std::string expected = "{\"text\": \"line1\\nline2\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("newline escaping", expected, result); + }); + + t.test("mixed quotes", [](testing & t) { + std::string input = "{\"already_double\": 'single_value'}"; + std::string expected = "{\"already_double\": \"single_value\"}"; + std::string result = normalize_quotes_to_json(input); + t.assert_equal("mixed quotes", expected, result); + }); + + t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes); +} + +// Test case that mirrors the Seed-OSS failing test scenario +static void test_normalize_quotes_with_embedded_quotes(testing & t) { + // This is similar to the Seed-OSS template test case + // The input has embedded double quotes like "14" and "bar" inside string values + std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n pass\\n'}"; + + // Expected: Python single quotes -> JSON double quotes, internal double quotes escaped + std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}"; + + std::string result = normalize_quotes_to_json(input); + + t.assert_equal("normalize quotes with embedded double quotes", expected, result); + + // Also verify the result is valid JSON + try { + json parsed = json::parse(result); + t.assert_true("result is valid JSON", true); + t.assert_equal("filename field", "foo.cpp", parsed["filename"].get()); + t.assert_true("oldString contains embedded quotes", + parsed["oldString"].get().find("\"14\"") != std::string::npos); + t.assert_true("newString contains embedded quotes", + parsed["newString"].get().find("\"15\"") != std::string::npos); + } catch (const std::exception & e) { + t.assert_true(std::string("JSON parse failed: ") + e.what(), false); + } +} + +// ============================================================================ +// TAG_WITH_TAGGED Argument Parsing Tests +// ============================================================================ + +// Build tools definition for edit function +static json build_edit_tool() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["filename"] = json::object({ + {"type", "string"}, + {"description", "Path of file to edit"} + }); + parameters_schema["properties"]["oldString"] = json::object({ + {"type", "string"}, + {"description", "String to replace"} + }); + parameters_schema["properties"]["newString"] = json::object({ + {"type", "string"}, + {"description", "New (replacement) value"} + }); + parameters_schema["required"] = json::array({"filename", "oldString", "newString"}); + + return json::array({ + json{ + {"type", "function"}, + {"function", json{ + {"name", "edit"}, + {"description", "Edit a file"}, + {"parameters", parameters_schema} + }} + } + }); +} + +// Test that reproduces the Seed-OSS template issue with embedded quotes +static void test_tagged_args_with_embedded_quotes(testing & t) { + json tools = build_edit_tool(); + + // Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron + auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { + // Build tool choice for the edit function + auto tool_choice = p.choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) continue; + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); + + if (!params.contains("properties") || !params.at("properties").is_object()) continue; + + const auto & properties = params.at("properties"); + + // Build argument parsers + std::vector arg_parsers; + for (const auto & [param_name, param_schema] : properties.items()) { + auto arg = p.tool_arg( + p.tool_arg_open(p.literal("")) + + p.space() + + p.tool_arg_string_value(p.until("")) + + p.space() + + p.tool_arg_close(p.literal("")) + ); + arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg))); + } + + // Build arg sequence with space() between + common_peg_parser args_seq = p.eps(); + for (size_t i = 0; i < arg_parsers.size(); i++) { + if (i > 0) { + args_seq = args_seq + p.space(); + } + args_seq = args_seq + arg_parsers[i]; + } + + auto func_parser = + p.tool_open(p.literal("")) + + p.space() + args_seq + p.space() + + p.tool_close(p.literal("")); + + tool_choice |= p.rule("tool-" + name, p.tool(func_parser)); + } + + auto tool_section = + p.literal("") + p.space() + + tool_choice + + p.space() + p.literal(""); + + return p.content(p.until("")) + p.optional(tool_section) + p.end(); + }); + + // The exact input from the failing test + std::string input = + "\n" + "\n" + "\n" + "foo.cpp\n" + "\n" + "" + "def foo(arg = \"14\"):\n" + " return arg + \"bar\"\n" + "\n" + "\n" + "" + "def foo(arg = \"15\"):\n" + " pass\n" + "\n" + "\n" + "\n" + ""; + + common_peg_parse_context ctx(input, false); + auto result = parser.parse(ctx); + + t.assert_true("parse success", result.success()); + + common_chat_msg msg; + auto mapper = common_chat_peg_unified_mapper(msg); + mapper.from_ast(ctx.ast, result); + + t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); + + if (!msg.tool_calls.empty()) { + t.assert_equal("tool name", "edit", msg.tool_calls[0].name); + + // Parse the arguments as JSON to verify they're valid + std::string args = msg.tool_calls[0].arguments; + + try { + json parsed = json::parse(args); + t.assert_true("arguments is valid JSON", true); + + // Verify each field has proper value + t.assert_equal("filename", "foo.cpp", parsed.value("filename", "")); + + std::string oldString = parsed.value("oldString", ""); + t.assert_true("oldString contains embedded quotes", + oldString.find("\"14\"") != std::string::npos); + t.assert_true("oldString contains bar with quotes", + oldString.find("\"bar\"") != std::string::npos); + + std::string newString = parsed.value("newString", ""); + t.assert_true("newString contains embedded quotes", + newString.find("\"15\"") != std::string::npos); + + } catch (const std::exception & e) { + t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false); + } + } +} + diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index ad2953f6da..38798ec9d7 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -14,6 +14,7 @@ #include "log.h" #include +#include #include #include #include @@ -439,7 +440,7 @@ const common_chat_msg message_assist_call_idx = const common_chat_msg message_assist_thoughts_call_idx = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0"); const common_chat_msg message_assist_thoughts_partial_call = - simple_assist_msg("", "I'm\nthinking", "", "", /* id = */ "0"); + simple_assist_msg("", "I'm\nthinking", "special_function", "", /* id = */ "0"); const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}"); const common_chat_msg message_assist_call_python_lines = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}"); @@ -541,14 +542,14 @@ static void test_templates(const struct common_chat_templates * tmpls, if (expect_grammar_triggered) { // TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time - common_chat_parser_params params; - params.format = data.params.format; - params.reasoning_format = reasoning_format; - if (!params.parser.empty()) { - syntax.parser = common_peg_arena(); - syntax.parser.load(params.parser); + common_chat_parser_params parser_params; + parser_params.format = data.params.format; + parser_params.reasoning_format = reasoning_format; + if (!parser_params.parser.empty()) { + parser_params.parser = common_peg_arena(); + parser_params.parser.load(params.parser); } - const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params); + const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, parser_params); assert_msg_equals(test_message, msg, ignore_whitespace_differences); } @@ -670,10 +671,12 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s auto last_msg = parse_msg(""); for (size_t i = 1; i <= raw_message.size(); ++i) { auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i)))); - if (curr_msg == simple_assist_msg("")) continue; - LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str()); - for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) { - LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str()); + if (curr_msg == simple_assist_msg("")) { + continue; + } + LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({ curr_msg }).dump().c_str()); + for (auto diff : common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) { + LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str()); if (!diff.reasoning_content_delta.empty()) { merged.reasoning_content += diff.reasoning_content_delta; } @@ -689,7 +692,7 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s merged.tool_calls.back().arguments += diff.tool_call_delta.arguments; } } - LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str()); + LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({ merged }).dump().c_str()); } assert_msg_equals(curr_msg, merged, true); last_msg = curr_msg; @@ -719,10 +722,12 @@ struct make_peg_parser { arena_.load(params_.parser); } - common_chat_msg parse(const std::string & msg, bool is_partial) { -common_chat_parser_params parser_params; + common_chat_msg parse(const std::string & msg, bool is_partial) const { + common_chat_parser_params parser_params; parser_params.format = params_.format; + parser_params.debug = detailed_debug_; return common_chat_peg_parse(arena_, msg, is_partial, parser_params); + } }; static void test_peg_parser(common_chat_templates * tmpls, @@ -809,7 +814,12 @@ static void test_peg_parser(common_chat_templates * tmpls, } } } - assert_msg_equals(msg_current, msg_accum, true); + try { + assert_msg_equals(msg_current, msg_accum, true); + } catch (std::exception & e) { + throw std::runtime_error((std::string("Error comparing accumulated message to current: ") + e.what()).c_str()); + } + msg_prev = msg_current; } @@ -916,8 +926,7 @@ class peg_test_builder { return; } } - LOG_DBG("\n================================\nRunning test for template: %s\n================================\n", - tester_.template_path().c_str()); + LOG_INF("\n\x1b[38;5;126m[%s]\x1b[0m\n%s\n\n", tester_.template_path().c_str(), tc_.input.c_str()); test_peg_parser(tester_.tmpls_.get(), [this](peg_test_case & t) { t = tc_; }, tester_.detailed_debug_); } }; @@ -940,51 +949,47 @@ static void test_msgs_oaicompat_json_conversion() { message_assist_call_python, }; for (const auto & msg : msgs) { - auto oai_json = common_chat_msgs_to_json_oaicompat({msg}); - auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json); + auto oai_json = common_chat_msgs_to_json_oaicompat({ msg }); + auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json); assert_equals((size_t) 1, msgs2.size()); const auto & msg2 = msgs2[0]; assert_msg_equals(msg, msg2); } - assert_equals( - std::string( - "[\n" - " {\n" - " \"role\": \"user\",\n" - " \"content\": [\n" - " {\n" - " \"type\": \"text\",\n" - " \"text\": \"Hey\"\n" - " },\n" - " {\n" - " \"type\": \"text\",\n" - " \"text\": \"there\"\n" - " }\n" - " ]\n" - " }\n" - "]" - ), - common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2)); + assert_equals(std::string("[\n" + " {\n" + " \"role\": \"user\",\n" + " \"content\": [\n" + " {\n" + " \"type\": \"text\",\n" + " \"text\": \"Hey\"\n" + " },\n" + " {\n" + " \"type\": \"text\",\n" + " \"text\": \"there\"\n" + " }\n" + " ]\n" + " }\n" + "]"), + common_chat_msgs_to_json_oaicompat({ message_user_parts }).dump(2)); - assert_equals( - std::string( - "[\n" - " {\n" - " \"role\": \"assistant\",\n" - " \"content\": \"\",\n" - " \"tool_calls\": [\n" - " {\n" - " \"type\": \"function\",\n" - " \"function\": {\n" - " \"name\": \"python\",\n" - " \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n" - " }\n" - " }\n" - " ]\n" - " }\n" - "]" - ), - common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2)); + // Note: content is "" instead of null due to workaround for templates that render null as "None" + // Arguments are serialized as string for OAI compatibility + assert_equals(std::string("[\n" + " {\n" + " \"role\": \"assistant\",\n" + " \"content\": \"\",\n" + " \"tool_calls\": [\n" + " {\n" + " \"type\": \"function\",\n" + " \"function\": {\n" + " \"name\": \"python\",\n" + " \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n" + " }\n" + " }\n" + " ]\n" + " }\n" + "]"), + common_chat_msgs_to_json_oaicompat({ message_assist_call_python }).dump(2)); auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]")); assert_equals(1, res.size()); @@ -1010,8 +1015,8 @@ static void test_tools_oaicompat_json_conversion() { }; for (const auto & tool : tools) { - auto oai_json = common_chat_tools_to_json_oaicompat({tool}); - auto tools2 = common_chat_tools_parse_oaicompat(oai_json); + auto oai_json = common_chat_tools_to_json_oaicompat({ tool }); + auto tools2 = common_chat_tools_parse_oaicompat(oai_json); assert_equals((size_t) 1, tools2.size()); auto tool2 = tools2[0]; assert_equals(tool.name, tool2.name); @@ -1019,364 +1024,267 @@ static void test_tools_oaicompat_json_conversion() { assert_equals(json::parse(tool.parameters).dump(2), json::parse(tool2.parameters).dump(2)); } - assert_equals( - std::string( - "[\n" - " {\n" - " \"type\": \"function\",\n" - " \"function\": {\n" - " \"name\": \"special_function\",\n" - " \"description\": \"I'm special\",\n" - " \"parameters\": {\n" - " \"type\": \"object\",\n" - " \"properties\": {\n" - " \"arg1\": {\n" - " \"type\": \"integer\",\n" - " \"description\": \"The arg.\"\n" - " }\n" - " },\n" - " \"required\": [\n" - " \"arg1\"\n" - " ]\n" - " }\n" - " }\n" - " }\n" - "]" - ), - common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2)); - - { - auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse( - R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])")); - assert_equals((size_t) 1, tools_no_params.size()); - assert_equals(std::string("test_func"), tools_no_params[0].name); - assert_equals(std::string("A test"), tools_no_params[0].description); - assert_equals(std::string("{}"), tools_no_params[0].parameters); - } - { - auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse( - R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])")); - assert_equals((size_t) 1, tools_no_desc.size()); - assert_equals(std::string("test_func"), tools_no_desc[0].name); - assert_equals(std::string(""), tools_no_desc[0].description); - } - { - auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse( - R"([{"type": "function", "function": {"name": "test_func"}}])")); - assert_equals((size_t) 1, tools_minimal.size()); - assert_equals(std::string("test_func"), tools_minimal[0].name); - assert_equals(std::string(""), tools_minimal[0].description); - assert_equals(std::string("{}"), tools_minimal[0].parameters); - } + assert_equals(std::string("[\n" + " {\n" + " \"type\": \"function\",\n" + " \"function\": {\n" + " \"name\": \"special_function\",\n" + " \"description\": \"I'm special\",\n" + " \"parameters\": {\n" + " \"type\": \"object\",\n" + " \"properties\": {\n" + " \"arg1\": {\n" + " \"type\": \"integer\",\n" + " \"description\": \"The arg.\"\n" + " }\n" + " },\n" + " \"required\": [\n" + " \"arg1\"\n" + " ]\n" + " }\n" + " }\n" + " }\n" + "]"), + common_chat_tools_to_json_oaicompat({ special_function_tool }).dump(2)); } -// for compat; ref: https://github.com/ggml-org/llama.cpp/pull/18961 -struct test_parser_params { - common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; - bool reasoning_in_content = false; - bool thinking_forced_open = false; - bool parse_tool_calls = true; -}; +static void test_template_output_peg_parsers(bool detailed_debug) { + LOG_DBG("%s\n", __func__); -static common_chat_msg test_chat_parse(const std::string & input, bool is_partial, const test_parser_params & syntax) { - common_chat_parser_params params; - params.format = syntax.format; - params.reasoning_format = syntax.reasoning_format; - params.reasoning_in_content = syntax.reasoning_in_content; - params.thinking_forced_open = syntax.thinking_forced_open; - params.parse_tool_calls = syntax.parse_tool_calls; - return common_chat_parse(input, is_partial, params); -} - -static void test_template_output_parsers() { - printf("[%s]\n", __func__); - - common_chat_templates_inputs inputs_no_tools; - inputs_no_tools.messages = {message_user}; - - common_chat_templates_inputs inputs_tools; - inputs_tools.messages = {message_user}; - inputs_tools.tools = {special_function_tool}; - - common_chat_templates_inputs inputs_tools_builtin; - inputs_tools_builtin.messages = {message_user}; - inputs_tools_builtin.tools = {python_tool}; - - { - // Not supported yet - auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja"); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - } - { - auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"); - std::vector end_tokens{ "<|END_OF_TURN_TOKEN|>" }; - - for (const auto & inputs : { inputs_no_tools, inputs_tools }) { - auto params = common_chat_templates_apply(tmpls.get(), inputs); - assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, params.format); - assert_equals(false, params.thinking_forced_open); + // JSON schemas + const char * invoice_schema = R"({ + "type": "object", + "properties": { + "amount": {"type": "number"}, + "date": {"type": "string"} } + })"; - assert_msg_equals(message_assist, - test_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_COMMAND_R7B})); - assert_msg_equals(message_assist, - test_chat_parse( - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_COMMAND_R7B})); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ true, - /* .thinking_forced_open = */ false, - })); - assert_msg_equals(message_assist_thoughts_unparsed_r7b, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_COMMAND_R7B})); - assert_msg_equals(message_assist_thoughts, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_call_idx, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_ACTION|>[\n" - " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" - "]<|END_ACTION|>", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_no_content, - test_chat_parse( - "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" - "<|START_ACTION|>[\n" - " {\"tool_call_id\": \"0\", \"tool_name\": \"special", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - - test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools, - "<|START_THINKING|><|END_THINKING|>" - "<|START_ACTION|>[\n" - " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" - "]<|END_ACTION|>", - /* expect_grammar_triggered= */ true, - /* test_grammar_if_triggered= */ true, - COMMON_REASONING_FORMAT_DEEPSEEK); - test_templates(tmpls.get(), end_tokens, message_assist, tools, - "<|START_RESPONSE|>Hello, world!\n" - "What's up?<|END_RESPONSE|>", - /* expect_grammar_triggered= */ false); - } - // TODO @ngxson : generic tool calls is too costly to maintain, consider removing it in the future { - auto tmpls = read_templates("models/templates/google-gemma-2-2b-it.jinja"); - std::vector end_tokens{ "" }; + // Ministral-3-14B-Reasoning-2512 + auto tst = peg_tester("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", detailed_debug); - assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals(COMMON_CHAT_FORMAT_GENERIC, - common_chat_templates_apply( - read_templates("models/templates/microsoft-Phi-3.5-mini-instruct.jinja").get(), - inputs_tools) - .format); + tst.test("Hello, world!\nWhat's up?").expect(message_assist).run(); - // Generic tool calls doesn't generate / parse content-only messages symmetrically. + tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?") + .expect_content("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?") + .run(); - assert_equals( - simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"), - test_chat_parse( - "{ \"tool_call\" : { \"name\" : \"t", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GENERIC, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - /* .parse_tool_calls = */ false, - })); - assert_equals( - message_assist_empty, - test_chat_parse( - "{ \"tool_call\" : { \"name\" : \"t", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GENERIC})); + tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); - assert_equals( - simple_assist_msg("", "", "puppeteer_screenshot", "{\"name\":\"servethehome_homepage\","), - test_chat_parse( - R"({"tool_call": {"name": "puppeteer_screenshot", "arguments": {"name": "servethehome_homepage",)", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GENERIC})); - - assert_equals( - message_assist_call_empty_args, - test_chat_parse( - "{ \"tool_call\" : { \"name\" : \"special_function\"", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GENERIC})); - assert_equals( - message_assist_call_cutoff_args, - test_chat_parse( - "{ \"tool_call\" : { \"name\" : \"special_function\", \"arguments\" : { \"arg", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GENERIC})); - - assert_msg_equals(message_assist, - test_chat_parse( - "{\n" - " \"response\": \"Hello, world!\\nWhat's up?\"\n" - "}", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GENERIC})); -#if 0 - test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools, - "{\n" - " \"tool_calls\": [\n" - " {\n" - " \"name\": \"special_function\",\n" - " \"arguments\": {\n" - " \"arg1\": 1\n" - " },\n" - " \"id\": \"123456789\"\n" - " }\n" - " ],\n" - " \"content\": \"\"\n" - "}"); -#endif - } - { - auto tmpls = read_templates("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja"); - std::vector end_tokens{ "
" }; - - assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); - test_templates( - tmpls.get(), end_tokens, message_assist_call_id, tools, - "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]"); - } - { - assert_msg_equals( - simple_assist_msg("Réponse", "raisonnement"), - test_chat_parse( - message_assist_thoughts_unparsed_magistral.content, - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_MAGISTRAL, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO, - })); - } - { - auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja"); - std::vector end_tokens{ "<|im_end|>" }; - - assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - } - { - auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja"); - std::vector end_tokens{ "<|im_end|>" }; - - assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - assert_equals( - COMMON_CHAT_FORMAT_HERMES_2_PRO, - common_chat_templates_apply( - read_templates("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja").get(), - inputs_tools) - .format); - assert_equals( - COMMON_CHAT_FORMAT_HERMES_2_PRO, - common_chat_templates_apply( - read_templates("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja").get(), - inputs_tools) - .format); - - // Test parsing - assert_msg_equals( - simple_assist_msg("", "", "python", ""), - common_chat_parse( - "```json\n" - " { \"name\" : \"python\"", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_HERMES_2_PRO})); - assert_msg_equals( - simple_assist_msg("Let's call something\n"), - common_chat_parse( - "Let's call something\n" - "{\"name\"", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals( - simple_assist_msg("Let's call something\n"), - common_chat_parse( - "Let's call something\n" - "{\"name", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_call_thoughts, - common_chat_parse( - // QwQ-32B's template adds a trailing if add_generation_prompt - "I'm\nthinking\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - /* .reasoning_in_content = */ false, - /* .thinking_forced_open = */ true, - })); - assert_msg_equals( - message_assist_call, - common_chat_parse( - "\n" - "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" - "") - .enable_thinking(true) + tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})") .reasoning_format(COMMON_REASONING_FORMAT_AUTO) .tools({ special_function_tool }) - .expect(simple_assist_msg("", "I should use a tool", "special_function", R"({"arg1": 1})")) + .expect(message_assist_call) + .run(); + + tst.test( + "[THINK]I'm\nthinking[/THINK]" + R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})" + R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); + + tst.test( + "[THINK]I need to output the invoice details in JSON[/THINK]" + "```json\n" + R"({"amount": 123.45, "date": "2025-12-03"})" + "\n```") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .json_schema(invoice_schema) + .expect_reasoning("I need to output the invoice details in JSON") + .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})") .run(); } + { + // NVIDIA Nemotron-3 Nano + auto tst = peg_tester("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", detailed_debug); + + tst.test("Hello, world!\nWhat's up?").enable_thinking(false).expect(message_assist).run(); + + tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_NONE) + .expect_content("I'm\nthinking\n\nHello, world!\nWhat's up?") + .run(); + + tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); + + tst.test( + "\n" + "\n" + "\n1\n\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + tst.test( + "I'm\nthinking\n\n" + "\n" + "\n" + "\n1\n\n" + "\n" + "") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + tst.test( + "\n" + "\n" + "\n1\n\n" + "\n" + "\n" + "\n" + "\n" + "\n1\n\n" + "\n2\n\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); + + tst.test( + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ + python_tool + }) + .expect_tool_calls({ + { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} }, + }) + .run(); + + tst.test( + "I need to output the invoice details in JSON\n" + "\n" + R"({"amount": 123.45, "date": "2025-12-03"})") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) + .json_schema(invoice_schema) + .expect_reasoning("I need to output the invoice details in JSON") + .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})") + .run(); + } + + { + // CohereForAI Command-R 7B (2024-tool_use) + auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug); + + tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run(); + + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist_thoughts) + .run(); + + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>") + .expect(message_assist_thoughts_unparsed_r7b) + .run(); + + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_thoughts_call_idx) + .run(); + + tst.test( + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", ") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .is_partial(true) + .expect(message_assist_thoughts_partial_call) + .run(); + + tst.test( + "<|START_THINKING|><|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>") + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_call_idx) + .run(); + } + + { + // Google Gemma 2 2B - does not support tool calling + auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja"); + + tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run(); + + tst.test("Line 1\nLine 2\nLine 3").expect(simple_assist_msg("Line 1\nLine 2\nLine 3")).run(); + } + + { + // Qwen-QwQ-32B (reasoning model) + auto tst = peg_tester("models/templates/Qwen-QwQ-32B.jinja"); + + // QwQ always has thinking forced open - input starts after the \n in the prompt + tst.test("Let me think about this...\n\nThe answer is 42.") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(simple_assist_msg("The answer is 42.", "Let me think about this...")) + .run(); + + tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run(); + } { // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models) auto tst = peg_tester("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", detailed_debug); @@ -1400,99 +1308,12 @@ static void test_template_output_parsers() { // Note: Hermes template doesn't support thinking/reasoning natively // Note: We only support one tool calling format per template, no alternate formats } - { - auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja"); - std::vector end_tokens{ "<|end_of_text|>" }; - - assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); - - assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_tools).format); - - // Test parsing regular content - assert_msg_equals(message_assist, - common_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals( - message_assist, - common_chat_parse( - "Hello, world!\nWhat's up?", - /* is_partial= */ true, - {COMMON_CHAT_FORMAT_GRANITE})); - - // Test parsing content with thinking - assert_msg_equals(message_assist_thoughts, - common_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts_unparsed_deepseek, - common_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals(message_assist_thoughts, - common_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(message_assist_thoughts, - common_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals(simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"), - common_chat_parse( - "I'm\nthinkingHello, world!\nWhat's up?", - /* is_partial= */ false, - {COMMON_CHAT_FORMAT_GRANITE})); - assert_msg_equals(message_assist_empty, - common_chat_parse( - "I'm\nthinking", - /* is_partial= */ true, - { - /* .format = */ COMMON_CHAT_FORMAT_GRANITE, - /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, - })); - assert_msg_equals( - message_assist_empty, - common_chat_parse( - "I'm\nthinkingI'm\nthinkingHello, world!\nWhat's up?") - .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) - .expect(message_assist_thoughts) - .run(); + // TODO: pending support for WRAPPED_WITH_REASONING + // tst.test("I'm\nthinkingHello, world!\nWhat's up?") + // .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + // .expect(message_assist_thoughts) + // .run(); } { @@ -1531,38 +1353,6 @@ static void test_template_output_parsers() { .expect(message_assist_call) .run(); - // Test deltas: the number of tool calls in partial parses should never decrease - std::string tool_msg = "\n" - "\n" - "[1, 2, 3]\n" - ""; - std::size_t previousToolCalls = 0; - for (std::size_t i = std::string("").length(); i < tool_msg.length() - 1; i++) { - auto partial = tool_msg.substr(0, i); - auto partial_res = common_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK }); - if (partial_res.tool_calls.size() < previousToolCalls) { - throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size())); - } - previousToolCalls = partial_res.tool_calls.size(); - } - - // Test multiple parameters in tool call - common_chat_msg msg_multi_param; - msg_multi_param.role = "assistant"; - msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""}); - assert_msg_equals( - msg_multi_param, - common_chat_parse( - "\n" - "\n" - "1\n" - "\n" - "") - .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) - .tools({ special_function_tool }) - .expect(simple_assist_msg("", "I need to call a function", "special_function", R"({"arg1": 1})")) - .run(); - tst.test( "\n" "\n" @@ -1588,9 +1378,7 @@ static void test_template_output_parsers() { tst.test( "\n" "\n" - "\n" - "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n" - "\n" + "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n" "\n" "") .tools({ @@ -1605,9 +1393,7 @@ static void test_template_output_parsers() { tst.test( "\n" "\n" - "\n" - "[{'item': 'Check stuff', 'selected': false}, {'item': 'Prepare stuff', 'selected': true}]\n" - "\n" + "[{'item': 'Check stuff', 'selected': false}, {'item': 'Prepare stuff', 'selected': true}]\n" "\n" "") .tools({ @@ -1618,19 +1404,19 @@ static void test_template_output_parsers() { }) .run(); - // single-quote normalization and tool call with inside quotes + // tool call with inside quotes tst.test( "\n" "\n" "\n" "foo.cpp\n" "\n" - "\n" + "" "def foo(arg = \"14\"):\n" " return arg + \"bar\"\n" "\n" "\n" - "\n" + "" "def foo(arg = \"15\"):\n" " pass\n" "\n" @@ -1809,7 +1595,7 @@ static void test_template_output_parsers() { // GLM-4.6 tests - format: function_name\n...\n...\n { - auto tst = peg_tester("models/templates/GLM-4.6.jinja"); + auto tst = peg_tester("models/templates/GLM-4.6.jinja", detailed_debug); tst.test( "special_function\n" "arg1\n1\n" @@ -1904,8 +1690,8 @@ static void test_template_output_parsers() { { auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug); tst.test( - "\n1\n") + "\n\n1\n\n") .tools({ special_function_tool }) .expect(message_assist_call) .run(); @@ -1915,7 +1701,7 @@ static void test_template_output_parsers() { // Format: [{"name": "func", "arguments": {...}}] { auto tst = peg_tester("models/templates/NVIDIA-Nemotron-Nano-v2.jinja", detailed_debug); - tst.test("[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]") + tst.test("[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]") .tools({ special_function_tool }) .expect(message_assist_call) .run(); @@ -2011,7 +1797,7 @@ static void test_template_output_parsers() { .run(); tst.test( "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - "```json\n{\"arg1\": 1}\n```<|tool▁call▁end|><|tool▁calls▁end|>") + "```json\n{\"arg1\": 1}```<|tool▁call▁end|><|tool▁calls▁end|>") .tools({ special_function_tool }) .parallel_tool_calls(true) .expect(message_assist_call) @@ -2028,7 +1814,7 @@ static void test_template_output_parsers() { .run(); tst.test( "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" - "```json\n{\"arg1\": 1}\n```<|tool▁call▁end|><|tool▁calls▁end|>") + "```json\n{\"arg1\": 1}```<|tool▁call▁end|><|tool▁calls▁end|>") .tools({ special_function_tool }) .expect(message_assist_call) .run(); @@ -2361,7 +2147,6 @@ static void test_msg_diffs_compute() { } int main(int argc, char ** argv) { - common_log_set_verbosity_thold(999); bool detailed_debug = false; bool only_run_filtered = false; @@ -2375,6 +2160,7 @@ int main(int argc, char ** argv) { } if (arg == "--detailed") { detailed_debug = true; + common_log_set_verbosity_thold(999); } } diff --git a/tests/test-peg-parser.cpp b/tests/test-peg-parser.cpp index 220745d029..7d22d77612 100644 --- a/tests/test-peg-parser.cpp +++ b/tests/test-peg-parser.cpp @@ -20,6 +20,7 @@ int main(int argc, char *argv[]) { t.test("json", test_json_parser); t.test("gbnf", test_gbnf_generation); t.test("serialization", test_json_serialization); + t.test("python-dict", test_python_dict_parser); return t.summary(); } diff --git a/tools/parser/CMakeLists.txt b/tools/parser/CMakeLists.txt index 4bf40a8717..73157b0a0e 100644 --- a/tools/parser/CMakeLists.txt +++ b/tools/parser/CMakeLists.txt @@ -6,3 +6,12 @@ target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) install(TARGETS ${TARGET} RUNTIME) endif() + +set(TARGET llama-template-analysis) +add_executable(${TARGET} template-analysis.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) + +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} RUNTIME) +endif() diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp index 551d2bcf9d..b8b4f3dfd3 100644 --- a/tools/parser/debug-template-parser.cpp +++ b/tools/parser/debug-template-parser.cpp @@ -1,15 +1,19 @@ #include "../src/llama-grammar.h" #include "chat-auto-parser.h" +#include "chat-diff-analyzer.h" #include "chat.h" #include "common.h" #include "gguf.h" +#include "jinja/runtime.h" #include "log.h" #include +#include #include #include #include "nlohmann/json.hpp" +#include "peg-parser.h" using json = nlohmann::ordered_json; @@ -239,7 +243,7 @@ static json build_tools_definition() { { "type", "string" }, { "description", "Second parameter" } }); - parameters_schema["required"] = json::array({ "param1", "param2" }); + parameters_schema["required"] = json::array({ "param1" }); return json::array({ json{ { "type", "function" }, @@ -324,68 +328,21 @@ static void render_all_scenarios(const common_chat_template & tmpl, } } -static const char * reasoning_mode_to_str(content_structure::reasoning_mode_type mode) { - switch (mode) { - case content_structure::REASONING_NONE: - return "NONE"; - case content_structure::REASONING_OPTIONAL: - return "OPTIONAL"; - case content_structure::REASONING_FORCED_OPEN: - return "FORCED_OPEN"; - } - return "UNKNOWN"; -} - -static const char * content_mode_to_str(content_structure::content_mode_type mode) { - switch (mode) { - case content_structure::CONTENT_PLAIN: - return "PLAIN"; - case content_structure::CONTENT_ALWAYS_WRAPPED: - return "ALWAYS_WRAPPED"; - case content_structure::CONTENT_WRAPPED_WITH_REASONING: - return "WRAPPED_WITH_REASONING"; - } - return "UNKNOWN"; -} - -static const char * function_format_to_str(enum tool_call_structure::function_format fmt) { - switch (fmt) { - case tool_call_structure::FUNC_JSON_OBJECT: - return "JSON_OBJECT"; - case tool_call_structure::FUNC_TAG_WITH_NAME: - return "TAG_WITH_NAME"; - case tool_call_structure::FUNC_TAG_NAME_ONLY: - return "TAG_NAME_ONLY"; - case tool_call_structure::FUNC_PREFIXED_INDEXED: - return "PREFIXED_INDEXED"; - case tool_call_structure::FUNC_NAME_AS_KEY: - return "NAME_AS_KEY"; - case tool_call_structure::FUNC_BRACKET_TAG: - return "BRACKET_TAG"; - case tool_call_structure::FUNC_RECIPIENT_BASED: - return "RECIPIENT_BASED"; - case tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK: - return "MARKDOWN_CODE_BLOCK"; - } - return "UNKNOWN"; -} - -static const char * argument_format_to_str(enum tool_call_structure::argument_format fmt) { - switch (fmt) { - case tool_call_structure::ARGS_JSON: - return "JSON"; - case tool_call_structure::ARGS_TAGGED: - return "TAGGED"; - case tool_call_structure::ARGS_KEY_VALUE_TAGS: - return "KEY_VALUE_TAGS"; - } - return "UNKNOWN"; +template +static std::string mode_to_str(T mode) { + std::ostringstream os; + os << mode; + return os.str(); } int main(int argc, char ** argv) { // Set log level to most verbose to capture all debug output common_log_set_verbosity_thold(99); + if (std::getenv("LLAMA_DEBUG_JINJA") != nullptr) { + jinja::enable_debug(true); + } + debug_options opts; if (!parse_options(argc, argv, opts)) { return 1; @@ -434,48 +391,7 @@ int main(int argc, char ** argv) { LOG_ERR(" TEMPLATE ANALYSIS\n"); LOG_ERR("================================================================================\n"); - template_analysis_result analysis = template_analyzer::analyze_template(chat_template); - - LOG_ERR("\n=== Analysis Results ===\n"); - - LOG_ERR("\n--- Content Structure (Phase 1) ---\n"); - LOG_ERR("reasoning_mode: %s\n", reasoning_mode_to_str(analysis.content.reasoning_mode)); - LOG_ERR("reasoning_start: '%s'\n", analysis.content.reasoning_start.c_str()); - LOG_ERR("reasoning_end: '%s'\n", analysis.content.reasoning_end.c_str()); - LOG_ERR("content_mode: %s\n", content_mode_to_str(analysis.content.content_mode)); - LOG_ERR("content_start: '%s'\n", analysis.content.content_start.c_str()); - LOG_ERR("content_end: '%s'\n", analysis.content.content_end.c_str()); - - LOG_ERR("\n--- Tool Structure (Phase 2) ---\n"); - LOG_ERR("supports_tools: %s\n", analysis.tools.supports_tools ? "true" : "false"); - LOG_ERR("function_format: %s\n", function_format_to_str(analysis.tools.function_format)); - LOG_ERR("argument_format: %s\n", argument_format_to_str(analysis.tools.argument_format)); - LOG_ERR("tool_section_start: '%s'\n", analysis.tools.tool_section_start.c_str()); - LOG_ERR("tool_section_end: '%s'\n", analysis.tools.tool_section_end.c_str()); - LOG_ERR("function_prefix: '%s'\n", analysis.tools.function_prefix.c_str()); - LOG_ERR("function_suffix: '%s'\n", analysis.tools.function_suffix.c_str()); - LOG_ERR("function_close: '%s'\n", analysis.tools.function_close.c_str()); - LOG_ERR("arg_prefix: '%s'\n", analysis.tools.arg_prefix.c_str()); - LOG_ERR("arg_suffix: '%s'\n", analysis.tools.arg_suffix.c_str()); - LOG_ERR("arg_close: '%s'\n", analysis.tools.arg_close.c_str()); - LOG_ERR("name_field: '%s'\n", analysis.tools.name_field.c_str()); - LOG_ERR("args_field: '%s'\n", analysis.tools.args_field.c_str()); - LOG_ERR("id_field: '%s'\n", analysis.tools.id_field.c_str()); - - // Additional fields for special formats - if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) { - LOG_ERR("\n--- Prefixed-Indexed Format Details ---\n"); - LOG_ERR("per_call_start: '%s'\n", analysis.tools.per_call_start.c_str()); - LOG_ERR("function_namespace: '%s'\n", analysis.tools.function_namespace.c_str()); - LOG_ERR("args_marker: '%s'\n", analysis.tools.args_marker.c_str()); - LOG_ERR("per_call_end: '%s'\n", analysis.tools.per_call_end.c_str()); - } - if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) { - LOG_ERR("\n--- Bracket-Tag Format Details ---\n"); - LOG_ERR("per_call_start: '%s'\n", analysis.tools.per_call_start.c_str()); - LOG_ERR("id_marker: '%s'\n", analysis.tools.id_marker.c_str()); - LOG_ERR("args_marker: '%s'\n", analysis.tools.args_marker.c_str()); - } + diff_analysis_result analysis = differential_analyzer::analyze(chat_template); // Generate Parser templates_params params; @@ -494,10 +410,45 @@ int main(int argc, char ** argv) { } params.parallel_tool_calls = false; - auto parser_data = universal_peg_generator::generate_parser(analysis, chat_template, params); + auto parser_data = universal_peg_generator::generate_parser(chat_template, params, analysis); + + LOG_ERR("\n=== Differential Analysis Results ===\n"); + + LOG_ERR("\n--- Reasoning & Content Structure ---\n"); + LOG_ERR("reasoning_mode: %s\n", mode_to_str(analysis.reasoning).c_str()); + LOG_ERR("reasoning_start: '%s'\n", analysis.markers.reasoning_start.c_str()); + LOG_ERR("reasoning_end: '%s'\n", analysis.markers.reasoning_end.c_str()); + LOG_ERR("content_mode: %s\n", mode_to_str(analysis.content).c_str()); + LOG_ERR("content_start: '%s'\n", analysis.markers.content_start.c_str()); + LOG_ERR("content_end: '%s'\n", analysis.markers.content_end.c_str()); + + LOG_ERR("\n--- Tool Call Structure ---\n"); + LOG_ERR("tool_mode: %s\n", mode_to_str(analysis.tools).c_str()); + LOG_ERR("supports_tools: %s\n", analysis.supports_tools ? "true" : "false"); + LOG_ERR("supports_parallel_calls: %s\n", analysis.supports_parallel_calls ? "true" : "false"); + LOG_ERR("tool_section_start: '%s'\n", analysis.markers.tool_section_start.c_str()); + LOG_ERR("tool_section_end: '%s'\n", analysis.markers.tool_section_end.c_str()); + LOG_ERR("per_call_start: '%s'\n", analysis.markers.per_call_start.c_str()); + LOG_ERR("per_call_end: '%s'\n", analysis.markers.per_call_end.c_str()); + LOG_ERR("func_name_prefix: '%s'\n", analysis.markers.func_name_prefix.c_str()); + LOG_ERR("func_name_suffix: '%s'\n", analysis.markers.func_name_suffix.c_str()); + LOG_ERR("func_close: '%s'\n", analysis.markers.func_close.c_str()); + LOG_ERR("arg_name_prefix: '%s'\n", analysis.markers.arg_name_prefix.c_str()); + LOG_ERR("arg_name_suffix: '%s'\n", analysis.markers.arg_name_suffix.c_str()); + LOG_ERR("arg_value_prefix: '%s'\n", analysis.markers.arg_value_prefix.c_str()); + LOG_ERR("arg_value_suffix: '%s'\n", analysis.markers.arg_value_suffix.c_str()); + LOG_ERR("name_field: '%s'\n", analysis.name_field.c_str()); + LOG_ERR("args_field: '%s'\n", analysis.args_field.c_str()); + LOG_ERR("id_field: '%s'\n", analysis.id_field.c_str()); + LOG_ERR("gen_id_field: '%s'\n", analysis.gen_id_field.c_str()); + LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.parameter_order.begin(), analysis.parameter_order.end(), + std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; } + ).c_str()); LOG_ERR("\n=== Generated Parser ===\n"); - LOG_ERR("%s\n", json::parse(parser_data.parser).dump(4).c_str()); + common_peg_arena arena; + arena.load(parser_data.parser); + LOG_ERR("%s\n", arena.dump(arena.root()).c_str()); LOG_ERR("\n=== Generated Grammar ===\n"); LOG_ERR("%s\n", parser_data.grammar.c_str()); diff --git a/tools/parser/template-analysis.cpp b/tools/parser/template-analysis.cpp new file mode 100644 index 0000000000..0fbcc09390 --- /dev/null +++ b/tools/parser/template-analysis.cpp @@ -0,0 +1,610 @@ +#include "chat-auto-parser.h" +#include "chat-auto-parser-helpers.h" +#include "chat.h" +#include "log.h" +#include "jinja/caps.h" +#include "jinja/runtime.h" + +#include +#include +#include +#include +#include + +#include "nlohmann/json.hpp" + +using json = nlohmann::ordered_json; + +// ANSI color codes - using 256-color palette for brighter colors (all bold) +#define ANSI_RESET "\033[0m" +#define ANSI_PURPLE "\033[1m\x1b[38;5;126m" // Bold bright purple for main headers +#define ANSI_CYAN "\033[1m\x1b[38;5;81m" // Bold bright cyan for section headers +#define ANSI_BLUE "\033[1m\x1b[38;5;12m" // Bold bright blue for labels +#define ANSI_ORANGE "\033[1m\x1b[38;5;209m" // Bold orange for right differences +#define ANSI_GREEN "\033[1m\x1b[38;5;83m" // Bold bright green for left differences +#define ANSI_GRAY "\033[1m\x1b[38;5;240m" // Bold gray (used for "no variables" message) +#define ANSI_BOLD "\033[1m" // Standalone bold +#define ANSI_PREFIX "\033[1m\x1b[38;5;176m" // Bold color for common prefix +#define ANSI_SUFFIX "\033[1m\x1b[38;5;61m" // Bold color for common suffix + +// All template paths extracted from tests/test-chat.cpp +static const std::vector ALL_TEMPLATE_PATHS = { + "models/templates/Apertus-8B-Instruct.jinja", + "models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", + "models/templates/ByteDance-Seed-OSS.jinja", + "models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", + "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", + "models/templates/GLM-4.6.jinja", + "models/templates/GLM-4.7-Flash.jinja", + "models/templates/Kimi-K2-Instruct.jinja", + "models/templates/Kimi-K2-Thinking.jinja", + "models/templates/MiMo-VL.jinja", + "models/templates/MiniMax-M2.jinja", + "models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", + "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", + "models/templates/NVIDIA-Nemotron-Nano-v2.jinja", + "models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", + "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja", + "models/templates/Qwen-QwQ-32B.jinja", + "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja", + "models/templates/Qwen3-Coder.jinja", + "models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", + "models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", + "models/templates/deepseek-ai-DeepSeek-V3.1.jinja", + "models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", + "models/templates/google-gemma-2-2b-it.jinja", + "models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", + "models/templates/llama-cpp-deepseek-r1.jinja", + "models/templates/meetkai-functionary-medium-v3.1.jinja", + "models/templates/meetkai-functionary-medium-v3.2.jinja", + "models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", + "models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", + "models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", + "models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", + "models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", + "models/templates/moonshotai-Kimi-K2.jinja", + "models/templates/openai-gpt-oss-120b.jinja", + "models/templates/unsloth-Apriel-1.5.jinja", + "models/templates/unsloth-mistral-Devstral-Small-2507.jinja", +}; + +struct analysis_options { + std::vector template_paths; + bool analyze_all = false; +}; + +static std::string read_file(const std::string & path) { + std::ifstream fin(path, std::ios::binary); + if (!fin.is_open()) { + throw std::runtime_error("Could not open file: " + path); + } + std::ostringstream buf; + buf << fin.rdbuf(); + return buf.str(); +} + +static void print_usage(const char * program_name) { + LOG_ERR("Usage: %s [options]\n", program_name); + LOG_ERR("\nOptions:\n"); + LOG_ERR(" --template Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n"); + LOG_ERR(" --template-file Analyze custom template file\n"); + LOG_ERR(" --all Analyze all templates from test suite\n"); + LOG_ERR("\nExamples:\n"); + LOG_ERR(" %s --all\n", program_name); + LOG_ERR(" %s --template deepseek\n", program_name); + LOG_ERR(" %s --template-file my-template.jinja\n", program_name); +} + +static bool parse_options(int argc, char ** argv, analysis_options & opts) { + if (argc < 2) { + print_usage(argv[0]); + return false; + } + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg == "--all") { + opts.analyze_all = true; + } else if (arg == "--template") { + if (i + 1 >= argc) { + LOG_ERR("--template requires an argument\n"); + return false; + } + std::string pattern = argv[++i]; + std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower); + + // Find matching templates + bool found = false; + for (const auto & path : ALL_TEMPLATE_PATHS) { + std::string path_lower = path; + std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower); + if (path_lower.find(pattern) != std::string::npos) { + opts.template_paths.push_back(path); + found = true; + } + } + + if (!found) { + LOG_ERR("No templates found matching: %s\n", pattern.c_str()); + return false; + } + } else if (arg == "--template-file") { + if (i + 1 >= argc) { + LOG_ERR("--template-file requires an argument\n"); + return false; + } + opts.template_paths.push_back(argv[++i]); + } else { + LOG_ERR("Unknown option: %s\n", arg.c_str()); + print_usage(argv[0]); + return false; + } + } + + if (opts.analyze_all) { + opts.template_paths = ALL_TEMPLATE_PATHS; + } + + if (opts.template_paths.empty()) { + LOG_ERR("No templates specified\n"); + print_usage(argv[0]); + return false; + } + + return true; +} + +static json build_tools_definition() { + json parameters_schema = json::object(); + parameters_schema["type"] = "object"; + parameters_schema["properties"] = json::object(); + parameters_schema["properties"]["param1"] = json::object({ + { "type", "string" }, + { "description", "First parameter" } + }); + parameters_schema["properties"]["param2"] = json::object({ + { "type", "string" }, + { "description", "Second parameter" } + }); + parameters_schema["required"] = json::array({ "param1", "param2" }); + + return json::array({ + json{ { "type", "function" }, + { "function", json{ { "name", "test_function_name" }, + { "description", "A test function for debugging" }, + { "parameters", parameters_schema } } } } + }); +} + +// Helper to create a tool call with arguments as JSON object +static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") { + return json{ + {"id", id}, + {"type", "function"}, + {"function", json{ + {"name", name}, + {"arguments", args_object} // Pass as JSON object, not serialized string + }} + }; +} + +// Helper functions to create repeating message definitions +static json make_user_msg() { + return json{ + {"role", "user"}, + {"content", "Hello, please help me."} + }; +} + +static json make_user_msg2() { + return json{ + {"role", "user"}, + {"content", "Thank you."} + }; +} + +static json make_user_msg2_continue() { + return json{ + {"role", "user"}, + {"content", "Continue."} + }; +} + +static json make_assistant_no_tool() { + return json{ + {"role", "assistant"}, + {"content", "Let me help you."} + }; +} + +static json make_assistant_one_tool() { + return json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })} + }; +} + +static json make_assistant_two_tools() { + return json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})), + build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002") + })} + }; +} + +static json make_assistant_no_reasoning() { + return json{ + {"role", "assistant"}, + {"content", "I can help you with that."} + }; +} + +static json make_assistant_with_reasoning() { + return json{ + {"role", "assistant"}, + {"content", "I can help you with that."}, + {"reasoning_content", "The user is asking for help. I should respond positively."} + }; +} + +static json make_assistant_one_tool_with_reasoning() { + return json{ + {"role", "assistant"}, + {"content", nullptr}, + {"tool_calls", json::array({ + build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})) + })}, + {"reasoning_content", "I need to call the tool first."} + }; +} + +static void print_diff_split(const std::string & title, const diff_split & diff) { + LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET); + LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str()); + LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str()); + LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str()); + LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str()); +} + +static void check_reasoning_variables(const common_chat_template & tmpl) { + LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET); + + try { + // Create a list of candidate reasoning/thinking variable names to probe + std::vector candidate_vars = { + "enable_reasoning", + "use_reasoning", + "reasoning_enabled", + "has_reasoning", + "reasoning_mode", + "reasoning_format", + "reasoning_active", + "with_reasoning", + "use_thinking", + "thinking_enabled", + "has_thinking", + "thinking_mode", + "thinking_format", + "thinking_active", + "with_thinking", + "enable_reason", + "reason_enabled", + "enable_think", + "think_enabled", + }; + + jinja::context ctx; + ctx.is_get_stats = true; + + json messages = json::array({ + json{ + {"role", "user"}, + {"content", "Test message"} + }, + json{ + {"role", "assistant"}, + {"content", "Response"}, + {"reasoning_content", "Some reasoning"} + } + }); + + // Set up base context + jinja::global_from_json(ctx, json{ + {"messages", messages}, + {"tools", json::array()}, + {"bos_token", ""}, + {"eos_token", ""}, + {"add_generation_prompt", false}, + {"enable_thinking", true} // Already passed, so we'll exclude this from results + }, true); + + // Add candidate variables as undefined to probe which ones are accessed + for (const auto & var_name : candidate_vars) { + ctx.set_val(var_name, jinja::mk_val(var_name)); + } + + try { + jinja::runtime runtime(ctx); + runtime.execute(tmpl.prog); + } catch (const std::exception & e) { + // Execution may fail, that's okay - we just want to see what variables were accessed + } + + // Check which candidate variables were accessed (stats.used = true) + std::vector accessed_vars; + for (const auto & var_name : candidate_vars) { + auto val = ctx.get_val(var_name); + if (!val->is_undefined()) { + // Variable was overwritten, skip it + continue; + } + if (val->stats.used) { + accessed_vars.push_back(var_name); + } + } + + if (accessed_vars.empty()) { + LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET); + } else { + LOG_ERR("Template queries the following reasoning/thinking-related variables:\n"); + for (const auto & var : accessed_vars) { + LOG_ERR(" %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET); + } + } + + } catch (const std::exception & e) { + LOG_ERR("Error checking reasoning variables: %s\n", e.what()); + } +} + +static void analyze_template(const std::string & template_path) { + LOG_ERR("\n"); + LOG_ERR("%s", ANSI_PURPLE); + LOG_ERR("================================================================================\n"); + LOG_ERR(" ANALYZING TEMPLATE: %s\n", template_path.c_str()); + LOG_ERR("================================================================================\n"); + LOG_ERR("%s", ANSI_RESET); + + std::string template_source; + try { + template_source = read_file(template_path); + } catch (const std::exception & e) { + LOG_ERR("Error reading template: %s\n", e.what()); + return; + } + + try { + common_chat_template chat_template(template_source, "", ""); + json tools = build_tools_definition(); + + // ===== CAPABILITIES ANALYSIS ===== + LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET); + auto caps = chat_template.original_caps(); + LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false"); + LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false"); + LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false"); + LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false"); + LOG_ERR("%srequires_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.requires_typed_content ? "true" : "false"); + + // ===== DIFFERENTIAL ANALYSIS ===== + + // Test 1: With and without tools (single user message) + { + json user_msg = make_user_msg(); + + templates_params params_no_tools; + params_no_tools.messages = json::array({ user_msg }); + params_no_tools.add_generation_prompt = false; + params_no_tools.tools = json::array(); + + templates_params params_with_tools = params_no_tools; + params_with_tools.tools = tools; + + std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools); + std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools); + + auto diff = calculate_diff_split(output_no_tools, output_with_tools); + print_diff_split("Diff: With vs Without Tools (single user message)", diff); + } + + // Test 2: With and without add_generation_prompt (single user message) + { + json user_msg = make_user_msg(); + + templates_params params_no_prompt; + params_no_prompt.messages = json::array({ user_msg }); + params_no_prompt.add_generation_prompt = false; + params_no_prompt.tools = json::array(); + + templates_params params_with_prompt = params_no_prompt; + params_with_prompt.add_generation_prompt = true; + + std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt); + std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt); + + auto diff = calculate_diff_split(output_no_prompt, output_with_prompt); + print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff); + } + + // Test 3: Assistant with reasoning_content (user, assistant) + { + json user_msg = make_user_msg(); + + templates_params params_no_reasoning; + params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() }); + params_no_reasoning.add_generation_prompt = false; + params_no_reasoning.enable_thinking = true; + + templates_params params_with_reasoning = params_no_reasoning; + params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() }); + + std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); + std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning); + + auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning); + print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff); + } + + // Test 4: Assistant with reasoning_content (user, assistant, user) + { + json user_msg = make_user_msg(); + json user_msg2 = make_user_msg2(); + + templates_params params_no_reasoning; + params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 }); + params_no_reasoning.add_generation_prompt = false; + params_no_reasoning.enable_thinking = true; + + templates_params params_with_reasoning = params_no_reasoning; + params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 }); + + std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); + std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning); + + auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning); + print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff); + } + + // Test 5: Tool call in last assistant message (user, assistant) + { + json user_msg = make_user_msg(); + + templates_params params_no_tool; + params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() }); + params_no_tool.add_generation_prompt = false; + params_no_tool.tools = tools; + + templates_params params_with_tool = params_no_tool; + params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() }); + + std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool); + std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool); + + auto diff = calculate_diff_split(output_no_tool, output_with_tool); + print_diff_split("Diff: With vs Without tool call (user, assistant)", diff); + } + + // Test 6: Tool call in last assistant message (user, assistant, user) + { + json user_msg = make_user_msg(); + json user_msg2 = make_user_msg2_continue(); + + templates_params params_no_tool; + params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 }); + params_no_tool.add_generation_prompt = false; + params_no_tool.tools = tools; + + templates_params params_with_tool = params_no_tool; + params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 }); + + std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool); + std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool); + + auto diff = calculate_diff_split(output_no_tool, output_with_tool); + print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff); + } + + // Test 7: One vs two tool calls (user, assistant) + { + json user_msg = make_user_msg(); + + templates_params params_one_tool; + params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() }); + params_one_tool.add_generation_prompt = false; + params_one_tool.tools = tools; + + templates_params params_two_tools = params_one_tool; + params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() }); + + std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool); + std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools); + + auto diff = calculate_diff_split(output_one_tool, output_two_tools); + print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff); + } + + // Test 8: One vs two tool calls (user, assistant, user) + { + json user_msg = make_user_msg(); + json user_msg2 = make_user_msg2_continue(); + + templates_params params_one_tool; + params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 }); + params_one_tool.add_generation_prompt = false; + params_one_tool.tools = tools; + + templates_params params_two_tools = params_one_tool; + params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 }); + + std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool); + std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools); + + auto diff = calculate_diff_split(output_one_tool, output_two_tools); + print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff); + } + + // Test 9: Tool call with vs without reasoning_content (user, assistant) + { + json user_msg = make_user_msg(); + + templates_params params_no_reasoning; + params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() }); + params_no_reasoning.add_generation_prompt = false; + params_no_reasoning.tools = tools; + params_no_reasoning.enable_thinking = true; + + templates_params params_with_reasoning = params_no_reasoning; + params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() }); + + std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); + std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning); + + auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning); + print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff); + } + + // Check reasoning variables + check_reasoning_variables(chat_template); + + } catch (const std::exception & e) { + LOG_ERR("Analysis failed: %s\n", e.what()); + } +} + +int main(int argc, char ** argv) { + // Set log level to capture all output + common_log_set_verbosity_thold(99); + + analysis_options opts; + if (!parse_options(argc, argv, opts)) { + return 1; + } + + LOG_ERR("\n"); + LOG_ERR("%s", ANSI_PURPLE); + LOG_ERR("================================================================================\n"); + LOG_ERR(" TEMPLATE ANALYSIS TOOL\n"); + LOG_ERR("================================================================================\n"); + LOG_ERR("%s", ANSI_RESET); + LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET); + + for (const auto & path : opts.template_paths) { + analyze_template(path); + } + + LOG_ERR("\n"); + LOG_ERR("%s", ANSI_GREEN); + LOG_ERR("================================================================================\n"); + LOG_ERR(" ANALYSIS COMPLETE\n"); + LOG_ERR("================================================================================\n"); + LOG_ERR("%s", ANSI_RESET); + + return 0; +} From c7029f858ddf48e7f480ef817e724355074001f9 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Mon, 2 Feb 2026 23:53:18 +0100 Subject: [PATCH 07/39] Missed this. --- 1 | 12 - template.ans | 7774 -------------------------------------------------- 2 files changed, 7786 deletions(-) delete mode 100644 1 delete mode 100644 template.ans diff --git a/1 b/1 deleted file mode 100644 index b77756c404..0000000000 --- a/1 +++ /dev/null @@ -1,12 +0,0 @@ -Unknown option: 2 -Usage: llama-template-analysis [options] - -Options: - --template Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1') - --template-file Analyze custom template file - --all Analyze all templates from test suite - -Examples: - llama-template-analysis --all - llama-template-analysis --template deepseek - llama-template-analysis --template-file my-template.jinja diff --git a/template.ans b/template.ans deleted file mode 100644 index da602b1a07..0000000000 --- a/template.ans +++ /dev/null @@ -1,7774 +0,0 @@ - -================================================================================ - TEMPLATE ANALYSIS TOOL -================================================================================ -Analyzing 37 template(s) - -================================================================================ - ANALYZING TEMPLATE: models/templates/Apertus-8B-Instruct.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities:' -Common Suffix: '<|developer_end|><|user_start|>Hello, please help me.<|user_end|>' -Left (difference): ' disabled' -Right (difference): ' -// -type = () => any;' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities: disabled<|developer_end|><|user_start|>Hello, please help me.<|user_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|assistant_start|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities: disabled<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|>I can help you with that.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities: disabled<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|>I can help you with that.<|assistant_end|><|user_start|>Thank you.<|user_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities: -// -type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|>' -Common Suffix: '' -Left (difference): 'Let me help you.' -Right (difference): '<|tools_prefix|>[{"test_function_name": {"param1":0x6414d8ab7770, "param2":0x6414d8b07d80}}]<|tools_suffix|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities: -// -type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|>' -Common Suffix: '<|assistant_end|><|user_start|>Continue.<|user_end|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tools_prefix|>[{"test_function_name": {"param1":0x6414d8b0a5f0, "param2":0x6414d8b03350}}]<|tools_suffix|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities: -// -type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|><|tools_prefix|>[{"test_function_name": {"param1":0x6414d8ab' -Common Suffix: '0}}]<|tools_suffix|>' -Left (difference): '8210, "param2":0x6414d8b1315' -Right (difference): '7220, "param2":0x6414d8b06690}}, {"test_function_name": {"param1":0x6414d8ae81e0, "param2":0x6414d8b0d43' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities: -// -type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|><|tools_prefix|>[{"test_function_name": {"param1":0x6414d8b0' -Common Suffix: '0}}]<|tools_suffix|><|assistant_end|><|user_start|>Continue.<|user_end|>' -Left (difference): '6690, "param2":0x6414d8b0e31' -Right (difference): 'd430, "param2":0x6414d8b06540}}, {"test_function_name": {"param1":0x6414d8b04530, "param2":0x6414d8b05b8' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|system_start|>You are Apertus, a helpful assistant created by the SwissAI initiative. -Knowledge cutoff: 2024-04 -Current date: 2026-01-26<|system_end|><|developer_start|>Deliberation: enabled -Tool Capabilities: -// -type = () => any;<|developer_end|><|user_start|>Hello, please help me.<|user_end|><|assistant_start|><|tools_prefix|>[{"test_function_name": {"param1":0x6414d8' -Common Suffix: '0}}]<|tools_suffix|>' -Left (difference): 'b05b80, "param2":0x6414d8b0a1c' -Right (difference): 'ae81e0, "param2":0x6414d8b0d43' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/Apriel-1.6-15b-Thinker-fixed.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -' -Common Suffix: '<|begin_user|> -Hello, please help me. -<|begin_assistant|> -Here are my reasoning steps: -' -Left (difference): '' -Right (difference): 'You are provided with function signatures within XML tags. - You may call one or more functions to assist with the user query. - Don't make assumptions about the arguments. You should infer the argument values from previous - user responses and the system message. - Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - . - - Return all function calls as a list of JSON objects within XML tags. - Each JSON object should contain a function name and arguments as follows: - [ - {"name": , "arguments": }, - {"name": , "arguments": }, - ... - ] -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -<|begin_user|> -Hello, please help me. -<|begin_assistant|> -Here are my reasoning steps: -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -<|begin_user|> -Hello, please help me. -<|begin_assistant|> -' -Common Suffix: 'I can help you with that.' -Left (difference): '' -Right (difference): 'The user is asking for help. I should respond positively. -[BEGIN FINAL RESPONSE] -' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -<|begin_user|> -Hello, please help me. -<|begin_assistant|> -I can help you with that. -<|end|> -<|begin_user|> -Thank you. -<|begin_assistant|> -Here are my reasoning steps: -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -You are provided with function signatures within XML tags. - You may call one or more functions to assist with the user query. - Don't make assumptions about the arguments. You should infer the argument values from previous - user responses and the system message. - Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - . - - Return all function calls as a list of JSON objects within XML tags. - Each JSON object should contain a function name and arguments as follows: - [ - {"name": , "arguments": }, - {"name": , "arguments": }, - ... - ] -<|begin_user|> -Hello, please help me. -<|begin_assistant|> -' -Common Suffix: '' -Left (difference): 'Let me help you.' -Right (difference): ' -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}]' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -You are provided with function signatures within XML tags. - You may call one or more functions to assist with the user query. - Don't make assumptions about the arguments. You should infer the argument values from previous - user responses and the system message. - Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - . - - Return all function calls as a list of JSON objects within XML tags. - Each JSON object should contain a function name and arguments as follows: - [ - {"name": , "arguments": }, - {"name": , "arguments": }, - ... - ] -<|begin_user|> -Hello, please help me. -<|begin_assistant|> -' -Common Suffix: ' -<|end|> -<|begin_user|> -Continue. -<|begin_assistant|> -Here are my reasoning steps: -' -Left (difference): 'Let me help you.' -Right (difference): ' -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}, "id": "call_001"}]' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -You are provided with function signatures within XML tags. - You may call one or more functions to assist with the user query. - Don't make assumptions about the arguments. You should infer the argument values from previous - user responses and the system message. - Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - . - - Return all function calls as a list of JSON objects within XML tags. - Each JSON object should contain a function name and arguments as follows: - [ - {"name": , "arguments": }, - {"name": , "arguments": }, - ... - ] -<|begin_user|> -Hello, please help me. -<|begin_assistant|> - -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}' -Common Suffix: ']' -Left (difference): '' -Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}}' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -You are provided with function signatures within XML tags. - You may call one or more functions to assist with the user query. - Don't make assumptions about the arguments. You should infer the argument values from previous - user responses and the system message. - Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - . - - Return all function calls as a list of JSON objects within XML tags. - Each JSON object should contain a function name and arguments as follows: - [ - {"name": , "arguments": }, - {"name": , "arguments": }, - ... - ] -<|begin_user|> -Hello, please help me. -<|begin_assistant|> - -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}, "id": "call_001"}' -Common Suffix: '] -<|end|> -<|begin_user|> -Continue. -<|begin_assistant|> -Here are my reasoning steps: -' -Left (difference): '' -Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}, "id": "call_002"}' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|begin_system|> -You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab. - Analyze each question carefully, present your reasoning step-by-step, then provide the final - response after the marker [BEGIN FINAL RESPONSE]. -You are provided with function signatures within XML tags. - You may call one or more functions to assist with the user query. - Don't make assumptions about the arguments. You should infer the argument values from previous - user responses and the system message. - Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - . - - Return all function calls as a list of JSON objects within XML tags. - Each JSON object should contain a function name and arguments as follows: - [ - {"name": , "arguments": }, - {"name": , "arguments": }, - ... - ] -<|begin_user|> -Hello, please help me. -<|begin_assistant|> -' -Common Suffix: ' -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}]' -Left (difference): '' -Right (difference): 'I need to call the tool first. -[BEGIN FINAL RESPONSE] -' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/ByteDance-Seed-OSS.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '' -Common Suffix: 'user -Hello, please help me.' -Left (difference): '' -Right (difference): 'system -You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. - -Function: -def test_function_name(param1: str,param2: str): - """ - A test function for debugging - - Args: - - param1 (str) [必填]: First parameter - - param2 (str) [必填]: Second parameter - - """ -工具调用请遵循如下格式: - - -value_1 -This is the value for the second parameter -that can span -multiple lines - - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: 'user -Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): 'assistant -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: 'user -Hello, please help me.assistant -' -Common Suffix: 'I can help you with that.' -Left (difference): '' -Right (difference): 'The user is asking for help. I should respond positively. -' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: 'user -Hello, please help me.assistant -' -Common Suffix: 'I can help you with that.user -Thank you.' -Left (difference): '' -Right (difference): 'The user is asking for help. I should respond positively. -' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: 'system -You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. - -Function: -def test_function_name(param1: str,param2: str): - """ - A test function for debugging - - Args: - - param1 (str) [必填]: First parameter - - param2 (str) [必填]: Second parameter - - """ -工具调用请遵循如下格式: - - -value_1 -This is the value for the second parameter -that can span -multiple lines - - -user -Hello, please help me.assistant -' -Common Suffix: '' -Left (difference): 'Let me help you.' -Right (difference): ' - -value1 -value2 - -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: 'system -You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. - -Function: -def test_function_name(param1: str,param2: str): - """ - A test function for debugging - - Args: - - param1 (str) [必填]: First parameter - - param2 (str) [必填]: Second parameter - - """ -工具调用请遵循如下格式: - - -value_1 -This is the value for the second parameter -that can span -multiple lines - - -user -Hello, please help me.assistant -' -Common Suffix: 'user -Continue.' -Left (difference): 'Let me help you.' -Right (difference): ' - -value1 -value2 - -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: 'system -You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. - -Function: -def test_function_name(param1: str,param2: str): - """ - A test function for debugging - - Args: - - param1 (str) [必填]: First parameter - - param2 (str) [必填]: Second parameter - - """ -工具调用请遵循如下格式: - - -value_1 -This is the value for the second parameter -that can span -multiple lines - - -user -Hello, please help me.assistant - - -value1 -value2 - -' -Common Suffix: '' -Left (difference): '' -Right (difference): ' - - -value3 -value4 - -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: 'system -You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. - -Function: -def test_function_name(param1: str,param2: str): - """ - A test function for debugging - - Args: - - param1 (str) [必填]: First parameter - - param2 (str) [必填]: Second parameter - - """ -工具调用请遵循如下格式: - - -value_1 -This is the value for the second parameter -that can span -multiple lines - - -user -Hello, please help me.assistant - - -value1 -value2 - -' -Common Suffix: 'user -Continue.' -Left (difference): '' -Right (difference): ' - - -value3 -value4 - -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: 'system -You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query. - -Function: -def test_function_name(param1: str,param2: str): - """ - A test function for debugging - - Args: - - param1 (str) [必填]: First parameter - - param2 (str) [必填]: Second parameter - - """ -工具调用请遵循如下格式: - - -value_1 -This is the value for the second parameter -that can span -multiple lines - - -user -Hello, please help me.assistant -' -Common Suffix: ' - -value1 -value2 - -' -Left (difference): '' -Right (difference): 'I need to call the tool first. -' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -' -Common Suffix: '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Left (difference): '' -Right (difference): '```python -def test_function_name(param1: str, param2: str) -> List[Dict]: - """A test function for debugging - - Args: - param1 (str): First parameter - param2 (str): Second parameter - """ - pass -```' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I can help you with that.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I can help you with that.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Thank you.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -```python -def test_function_name(param1: str, param2: str) -> List[Dict]: - """A test function for debugging - - Args: - param1 (str): First parameter - param2 (str): Second parameter - """ - pass -```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Common Suffix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Left (difference): 'Let me help you.<|END_OF_TURN_TOKEN|>' -Right (difference): ' -Action: -```json -[ - { - "tool_name": "test_function_name", - "parameters": { - "param1": "value1", - "param2": "value2" -} - } -]``` -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -```python -def test_function_name(param1: str, param2: str) -> List[Dict]: - """A test function for debugging - - Args: - param1 (str): First parameter - param2 (str): Second parameter - """ - pass -```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Common Suffix: '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Continue.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Left (difference): 'Let me help you.<|END_OF_TURN_TOKEN|>' -Right (difference): ' -Action: -```json -[ - { - "tool_name": "test_function_name", - "parameters": { - "param1": "value1", - "param2": "value2" -} - } -]``` -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -```python -def test_function_name(param1: str, param2: str) -> List[Dict]: - """A test function for debugging - - Args: - param1 (str): First parameter - param2 (str): Second parameter - """ - pass -```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> -Action: -```json -[ - { - "tool_name": "test_function_name", - "parameters": { - "param1": "value1", - "param2": "value2" -} - }' -Common Suffix: ' -]``` -<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Left (difference): '' -Right (difference): ', - { - "tool_name": "test_function_name", - "parameters": { - "param1": "value3", - "param2": "value4" -} - }' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -```python -def test_function_name(param1: str, param2: str) -> List[Dict]: - """A test function for debugging - - Args: - param1 (str): First parameter - param2 (str): Second parameter - """ - pass -```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> -Action: -```json -[ - { - "tool_name": "test_function_name", - "parameters": { - "param1": "value1", - "param2": "value2" -} - }' -Common Suffix: ' -]``` -<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Continue.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Left (difference): '' -Right (difference): ', - { - "tool_name": "test_function_name", - "parameters": { - "param1": "value3", - "param2": "value4" -} - }' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble -The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral. - -# System Preamble -## Basic Rules -You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions. - -# User Preamble -## Task and Context -You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging. - -## Style Guide -Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling. - -## Available Tools -Here is a list of tools that you have available to you: - -```python -def test_function_name(param1: str, param2: str) -> List[Dict]: - """A test function for debugging - - Args: - param1 (str): First parameter - param2 (str): Second parameter - """ - pass -```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> -Action: -```json -[ - { - "tool_name": "test_function_name", - "parameters": { - "param1": "value1", - "param2": "value2" -} - } -]``` -<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: -```json -[ - { - "tool_name": title of the tool in the specification, - "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters - } -]```<|END_OF_TURN_TOKEN|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. -' -Common Suffix: '# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Left (difference): '' -Right (difference): ' -You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. - -## Tool Use -Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. - -0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. - NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. - -Then carry out your plan by repeatedly executing the following steps. -1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. - When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. -2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. - Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". -3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. - NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. - -You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. - -4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. - -## Available Tools -Here is the list of tools that you have available to you. -You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. -Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). - -```json -[ - {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} -] -``` - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. -# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. -# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>I can help you with that.<|END_RESPONSE|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. -# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>I can help you with that.<|END_RESPONSE|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Thank you.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. - -You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. - -## Tool Use -Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. - -0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. - NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. - -Then carry out your plan by repeatedly executing the following steps. -1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. - When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. -2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. - Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". -3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. - NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. - -You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. - -4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. - -## Available Tools -Here is the list of tools that you have available to you. -You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. -Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). - -```json -[ - {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} -] -``` - -# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Common Suffix: '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Left (difference): '<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>' -Right (difference): '<|START_THINKING|><|END_THINKING|><|START_ACTION|>[ - {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}} -]<|END_ACTION|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. - -You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. - -## Tool Use -Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. - -0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. - NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. - -Then carry out your plan by repeatedly executing the following steps. -1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. - When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. -2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. - Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". -3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. - NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. - -You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. - -4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. - -## Available Tools -Here is the list of tools that you have available to you. -You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. -Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). - -```json -[ - {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} -] -``` - -# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Common Suffix: '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Continue.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Left (difference): '<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>' -Right (difference): '<|START_THINKING|><|END_THINKING|><|START_ACTION|>[ - {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}} -]<|END_ACTION|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. - -You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. - -## Tool Use -Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. - -0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. - NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. - -Then carry out your plan by repeatedly executing the following steps. -1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. - When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. -2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. - Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". -3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. - NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. - -You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. - -4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. - -## Available Tools -Here is the list of tools that you have available to you. -You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. -Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). - -```json -[ - {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} -] -``` - -# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|><|START_ACTION|>[ - {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' -Common Suffix: ' -]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Left (difference): '' -Right (difference): ', - {"tool_call_id": "1", "tool_name": "test_function_name", "parameters": {"param1": "value3", "param2": "value4"}}' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. - -You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. - -## Tool Use -Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. - -0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. - NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. - -Then carry out your plan by repeatedly executing the following steps. -1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. - When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. -2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. - Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". -3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. - NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. - -You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. - -4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. - -## Available Tools -Here is the list of tools that you have available to you. -You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. -Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). - -```json -[ - {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} -] -``` - -# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|><|START_ACTION|>[ - {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' -Common Suffix: ' -]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Continue.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Left (difference): '' -Right (difference): ', - {"tool_call_id": "1", "tool_name": "test_function_name", "parameters": {"param1": "value3", "param2": "value4"}}' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble -You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. - -Your information cutoff date is June 2024. - -You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. - -You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. - -## Tool Use -Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. - -0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. - NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. - -Then carry out your plan by repeatedly executing the following steps. -1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. - When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. -2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. - Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". -3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. - You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. - NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. - -You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. - -4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. - -## Available Tools -Here is the list of tools that you have available to you. -You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. -Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). - -```json -[ - {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}, "responses": null} -] -``` - -# Default Preamble -The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. -- Your name is Command. -- You are a large language model built by Cohere. -- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. -- If the input is ambiguous, ask clarifying follow-up questions. -- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). -- Use LaTeX to generate mathematical notation for complex equations. -- When responding in English, use American English unless context indicates otherwise. -- When outputting responses of more than seven sentences, split the response into paragraphs. -- Prefer the active voice. -- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. -- Use gender-neutral pronouns for unspecified persons. -- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. -- Use the third person when asked to write a summary. -- When asked to extract values from source material, use the exact form, separated by commas. -- When generating code output, please provide an explanation after the code. -- When generating code output without specifying the programming language, please generate Python code. -- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, please help me.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|><|START_ACTION|>[ - {"tool_call_id": "0", "tool_name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}} -]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/GLM-4.6.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '[gMASK]' -Common Suffix: '<|user|> -Hello, please help me.' -Left (difference): '' -Right (difference): '<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name} -{arg-key-1} -{arg-value-1} -{arg-key-2} -{arg-value-2} -... -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '[gMASK]<|user|> -Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|assistant|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '[gMASK]<|user|> -Hello, please help me.<|assistant|> -' -Common Suffix: ' -I can help you with that.' -Left (difference): '' -Right (difference): 'The user is asking for help. I should respond positively.' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '[gMASK]<|user|> -Hello, please help me.<|assistant|> - -I can help you with that.<|user|> -Thank you.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name} -{arg-key-1} -{arg-value-1} -{arg-key-2} -{arg-value-2} -... -<|user|> -Hello, please help me.<|assistant|> - -' -Common Suffix: '' -Left (difference): 'Let me help you.' -Right (difference): 'test_function_name -param1 -value1 -param2 -value2 -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name} -{arg-key-1} -{arg-value-1} -{arg-key-2} -{arg-value-2} -... -<|user|> -Hello, please help me.<|assistant|> - -' -Common Suffix: '<|user|> -Continue.' -Left (difference): 'Let me help you.' -Right (difference): 'test_function_name -param1 -value1 -param2 -value2 -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name} -{arg-key-1} -{arg-value-1} -{arg-key-2} -{arg-value-2} -... -<|user|> -Hello, please help me.<|assistant|> - -test_function_name -param1 -value1 -param2 -value2 -' -Common Suffix: '' -Left (difference): '' -Right (difference): ' -test_function_name -param1 -value3 -param2 -value4 -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name} -{arg-key-1} -{arg-value-1} -{arg-key-2} -{arg-value-2} -... -<|user|> -Hello, please help me.<|assistant|> - -test_function_name -param1 -value1 -param2 -value2 -' -Common Suffix: '<|user|> -Continue.' -Left (difference): '' -Right (difference): ' -test_function_name -param1 -value3 -param2 -value4 -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name} -{arg-key-1} -{arg-value-1} -{arg-key-2} -{arg-value-2} -... -<|user|> -Hello, please help me.<|assistant|> -' -Common Suffix: ' -test_function_name -param1 -value1 -param2 -value2 -' -Left (difference): '' -Right (difference): 'I need to call the tool first.' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/GLM-4.7-Flash.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '[gMASK]' -Common Suffix: '<|user|>Hello, please help me.' -Left (difference): '' -Right (difference): '<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '[gMASK]<|user|>Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|assistant|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '[gMASK]<|user|>Hello, please help me.<|assistant|>' -Common Suffix: 'I can help you with that.' -Left (difference): '' -Right (difference): 'The user is asking for help. I should respond positively.' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '[gMASK]<|user|>Hello, please help me.<|assistant|>I can help you with that.<|user|>Thank you.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>' -Common Suffix: '' -Left (difference): 'Let me help you.' -Right (difference): 'test_function_nameparam1value1param2value2' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>' -Common Suffix: '<|user|>Continue.' -Left (difference): 'Let me help you.' -Right (difference): 'test_function_nameparam1value1param2value2' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>test_function_nameparam1value1param2value2' -Common Suffix: '' -Left (difference): '' -Right (difference): 'test_function_nameparam1value3param2value4' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>test_function_nameparam1value1param2value2' -Common Suffix: '<|user|>Continue.' -Left (difference): '' -Right (difference): 'test_function_nameparam1value3param2value4' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '[gMASK]<|system|> -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, output the function name and arguments within the following XML format: -{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...<|user|>Hello, please help me.<|assistant|>' -Common Suffix: 'test_function_nameparam1value1param2value2' -Left (difference): '' -Right (difference): 'I need to call the tool first.' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/Kimi-K2-Instruct.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_system|>' -Common Suffix: 'system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' -Left (difference): '' -Right (difference): 'tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_assistant|>assistant<|im_middle|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|><|im_user|>user<|im_middle|>Thank you.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' -Common Suffix: '<|im_end|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' -Common Suffix: '<|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' -Common Suffix: '<|tool_calls_section_end|><|im_end|>' -Left (difference): '' -Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' -Common Suffix: '<|tool_calls_section_end|><|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' -Left (difference): '' -Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> -<|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|><|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/Kimi-K2-Thinking.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_system|>' -Common Suffix: 'system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' -Left (difference): '' -Right (difference): 'tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_assistant|>assistant<|im_middle|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|><|im_user|>user<|im_middle|>Thank you.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' -Common Suffix: '<|im_end|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' -Common Suffix: '<|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' -Common Suffix: '<|tool_calls_section_end|><|im_end|>' -Left (difference): '' -Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' -Common Suffix: '<|tool_calls_section_end|><|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' -Left (difference): '' -Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' -Common Suffix: '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|><|im_end|>' -Left (difference): '' -Right (difference): 'I need to call the tool first.' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/MiMo-VL.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi.' -Common Suffix: '<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Left (difference): '' -Right (difference): ' - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi.<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_start|>assistant -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi.<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi.<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -<|im_start|>user -Thank you.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Common Suffix: '<|im_end|> -' -Left (difference): '' -Right (difference): ' - -{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): '' -Right (difference): ' - -{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are MiMo, an AI assistant developed by Xiaomi. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/MiniMax-M2.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant.' -Common Suffix: '[e~[ -]~b]user -Hello, please help me.[e~[ -' -Left (difference): '' -Right (difference): ' - -# Tools -You may call one or more tools to assist with the user query. -Here are the tools available in JSONSchema format: - - -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -When making tool calls, use XML format to invoke tools and pass parameters: - - - -param-value-1 -param-value-2 -... - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant.[e~[ -]~b]user -Hello, please help me.[e~[ -' -Common Suffix: '' -Left (difference): '' -Right (difference): ']~b]ai - -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant.[e~[ -]~b]user -Hello, please help me.[e~[ -]~b]ai -' -Common Suffix: 'I can help you with that.[e~[ -' -Left (difference): '' -Right (difference): ' -The user is asking for help. I should respond positively. - - -' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant.[e~[ -]~b]user -Hello, please help me.[e~[ -]~b]ai -I can help you with that.[e~[ -]~b]user -Thank you.[e~[ -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant. - -# Tools -You may call one or more tools to assist with the user query. -Here are the tools available in JSONSchema format: - - -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -When making tool calls, use XML format to invoke tools and pass parameters: - - - -param-value-1 -param-value-2 -... - -[e~[ -]~b]user -Hello, please help me.[e~[ -]~b]ai -' -Common Suffix: '[e~[ -' -Left (difference): 'Let me help you.' -Right (difference): ' - - -value1 -value2 - -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant. - -# Tools -You may call one or more tools to assist with the user query. -Here are the tools available in JSONSchema format: - - -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -When making tool calls, use XML format to invoke tools and pass parameters: - - - -param-value-1 -param-value-2 -... - -[e~[ -]~b]user -Hello, please help me.[e~[ -]~b]ai -' -Common Suffix: '[e~[ -]~b]user -Continue.[e~[ -' -Left (difference): 'Let me help you.' -Right (difference): ' - - -value1 -value2 - -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant. - -# Tools -You may call one or more tools to assist with the user query. -Here are the tools available in JSONSchema format: - - -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -When making tool calls, use XML format to invoke tools and pass parameters: - - - -param-value-1 -param-value-2 -... - -[e~[ -]~b]user -Hello, please help me.[e~[ -]~b]ai - - - -value1 -value2 - -' -Common Suffix: '[e~[ -' -Left (difference): '' -Right (difference): ' -value3 -value4 - -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant. - -# Tools -You may call one or more tools to assist with the user query. -Here are the tools available in JSONSchema format: - - -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -When making tool calls, use XML format to invoke tools and pass parameters: - - - -param-value-1 -param-value-2 -... - -[e~[ -]~b]user -Hello, please help me.[e~[ -]~b]ai - - - -value1 -value2 - -' -Common Suffix: '[e~[ -]~b]user -Continue.[e~[ -' -Left (difference): '' -Right (difference): ' -value3 -value4 - -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: ']~!b[]~b]system -You are a helpful assistant. - -# Tools -You may call one or more tools to assist with the user query. -Here are the tools available in JSONSchema format: - - -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -When making tool calls, use XML format to invoke tools and pass parameters: - - - -param-value-1 -param-value-2 -... - -[e~[ -]~b]user -Hello, please help me.[e~[ -]~b]ai -' -Common Suffix: ' - - -value1 -value2 - -[e~[ -' -Left (difference): '' -Right (difference): ' -I need to call the tool first. - - -' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. -Your knowledge base was last updated on 2023-10-01. The current date is 2026-01-26. - -When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. - -If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). -You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. -You follow these instructions in all languages, and always respond to the user in the language they use or request. -Next sections describe the capabilities that you have. - -# WEB BROWSING INSTRUCTIONS - -You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. - -# MULTI-MODAL INSTRUCTIONS - -You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. -You cannot read nor transcribe audio files or videos. - -# TOOL CALLING INSTRUCTIONS - -You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: - -1. When the request requires up-to-date information. -2. When the request requires specific data that you do not have in your knowledge base. -3. When the request involves actions that you cannot perform without tools. - -Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. -Your knowledge base was last updated on 2023-10-01. The current date is 2026-01-26. - -When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. - -If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). -You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. -You follow these instructions in all languages, and always respond to the user in the language they use or request. -Next sections describe the capabilities that you have. - -# WEB BROWSING INSTRUCTIONS - -You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. - -# MULTI-MODAL INSTRUCTIONS - -You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. -You cannot read nor transcribe audio files or videos. - -# TOOL CALLING INSTRUCTIONS - -You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: - -1. When the request requires up-to-date information. -2. When the request requires specific data that you do not have in your knowledge base. -3. When the request involves actions that you cannot perform without tools. - -Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. -Your knowledge base was last updated on 2023-10-01. The current date is 2026-01-26. - -When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. - -If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). -You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. -You follow these instructions in all languages, and always respond to the user in the language they use or request. -Next sections describe the capabilities that you have. - -# WEB BROWSING INSTRUCTIONS - -You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. - -# MULTI-MODAL INSTRUCTIONS - -You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. -You cannot read nor transcribe audio files or videos. - -# TOOL CALLING INSTRUCTIONS - -You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: - -1. When the request requires up-to-date information. -2. When the request requires specific data that you do not have in your knowledge base. -3. When the request involves actions that you cannot perform without tools. - -Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '[SYSTEM_PROMPT]You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. -Your knowledge base was last updated on 2023-10-01. The current date is 2026-01-26. - -When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. - -If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?"). -You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. -You follow these instructions in all languages, and always respond to the user in the language they use or request. -Next sections describe the capabilities that you have. - -# WEB BROWSING INSTRUCTIONS - -You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. - -# MULTI-MODAL INSTRUCTIONS - -You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. -You cannot read nor transcribe audio files or videos. - -# TOOL CALLING INSTRUCTIONS - -You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: - -1. When the request requires up-to-date information. -2. When the request requires specific data that you do not have in your knowledge base. -3. When the request involves actions that you cannot perform without tools. - -Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.[INST]Thank you.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' -Analysis failed: ------------- -While executing CallExpression at line 91, column 40 in source: -...↵ {{- raise_exception("Tool call IDs should be alphanumeric s... - ^ -Error: Jinja Exception: Tool call IDs should be alphanumeric strings with length 9! - -================================================================================ - ANALYZING TEMPLATE: models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_start|>system -' -Common Suffix: '<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Left (difference): '' -Right (difference): '# Tools - -You have access to the following functions: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a function ONLY reply in the following format with NO suffix: - - - - -value_1 - - -This is the value for the second parameter -that can span -multiple lines - - - - - -Reminder: -- Function calls MUST follow the specified format: an inner block must be nested within XML tags -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_start|>system -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_start|>assistant - -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: 'I can help you with that.<|im_end|> -' -Left (difference): '' -Right (difference): ' -The user is asking for help. I should respond positively. - -' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_start|>system -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: 'I can help you with that.<|im_end|> -<|im_start|>user -Thank you.<|im_end|> -' -Left (difference): '' -Right (difference): ' -' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_start|>system -# Tools - -You have access to the following functions: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a function ONLY reply in the following format with NO suffix: - - - - -value_1 - - -This is the value for the second parameter -that can span -multiple lines - - - - - -Reminder: -- Function calls MUST follow the specified format: an inner block must be nested within XML tags -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' - - - -value1 - - -value2 - - - -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_start|>system -# Tools - -You have access to the following functions: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a function ONLY reply in the following format with NO suffix: - - - - -value_1 - - -This is the value for the second parameter -that can span -multiple lines - - - - - -Reminder: -- Function calls MUST follow the specified format: an inner block must be nested within XML tags -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' - - - -value1 - - -value2 - - - -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_start|>system -# Tools - -You have access to the following functions: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a function ONLY reply in the following format with NO suffix: - - - - -value_1 - - -This is the value for the second parameter -that can span -multiple lines - - - - - -Reminder: -- Function calls MUST follow the specified format: an inner block must be nested within XML tags -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - - - - -value1 - - -value2 - - - -' -Common Suffix: '<|im_end|> -' -Left (difference): '' -Right (difference): ' - - -value3 - - -value4 - - - -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_start|>system -# Tools - -You have access to the following functions: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a function ONLY reply in the following format with NO suffix: - - - - -value_1 - - -This is the value for the second parameter -that can span -multiple lines - - - - - -Reminder: -- Function calls MUST follow the specified format: an inner block must be nested within XML tags -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - - - - -value1 - - -value2 - - - -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): '' -Right (difference): ' - - -value3 - - -value4 - - - -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -# Tools - -You have access to the following functions: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a function ONLY reply in the following format with NO suffix: - - - - -value_1 - - -This is the value for the second parameter -that can span -multiple lines - - - - - -Reminder: -- Function calls MUST follow the specified format: an inner block must be nested within XML tags -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: ' - - - -value1 - - -value2 - - - -<|im_end|> -' -Left (difference): '' -Right (difference): ' -I need to call the tool first. -' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/NVIDIA-Nemotron-Nano-v2.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: 'System -' -Common Suffix: ' - -User -Hello, please help me. -' -Left (difference): '' -Right (difference): 'You can use the following tools to assist the user if required: -[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] - -If you decide to call any tool(s), use the following format: -[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] - -The user will execute tool-calls and return responses from tool(s) in this format: -[{{"tool_response1"}}, {{"tool_response2"}}] - -Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user.' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: 'System - - -User -Hello, please help me. -' -Common Suffix: '' -Left (difference): '' -Right (difference): 'Assistant - -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: 'System - - -User -Hello, please help me. -Assistant - -I can help you with that. - -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: 'System - - -User -Hello, please help me. -Assistant -I can help you with that. - -User -Thank you. -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: 'System -You can use the following tools to assist the user if required: -[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] - -If you decide to call any tool(s), use the following format: -[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] - -The user will execute tool-calls and return responses from tool(s) in this format: -[{{"tool_response1"}}, {{"tool_response2"}}] - -Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. - -User -Hello, please help me. -Assistant -' -Common Suffix: ' -' -Left (difference): ' -Let me help you. -' -Right (difference): '[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}] -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: 'System -You can use the following tools to assist the user if required: -[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] - -If you decide to call any tool(s), use the following format: -[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] - -The user will execute tool-calls and return responses from tool(s) in this format: -[{{"tool_response1"}}, {{"tool_response2"}}] - -Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. - -User -Hello, please help me. -Assistant -' -Common Suffix: ' - -User -Continue. -' -Left (difference): 'Let me help you.' -Right (difference): '[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}]' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: 'System -You can use the following tools to assist the user if required: -[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] - -If you decide to call any tool(s), use the following format: -[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] - -The user will execute tool-calls and return responses from tool(s) in this format: -[{{"tool_response1"}}, {{"tool_response2"}}] - -Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. - -User -Hello, please help me. -Assistant -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}' -Common Suffix: '] - -' -Left (difference): '' -Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}}' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: 'System -You can use the following tools to assist the user if required: -[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] - -If you decide to call any tool(s), use the following format: -[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] - -The user will execute tool-calls and return responses from tool(s) in this format: -[{{"tool_response1"}}, {{"tool_response2"}}] - -Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. - -User -Hello, please help me. -Assistant -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}' -Common Suffix: '] - -User -Continue. -' -Left (difference): '' -Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}}' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: 'System -You can use the following tools to assist the user if required: -[{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}] - -If you decide to call any tool(s), use the following format: -[{{"name": "tool_name1", "arguments": "tool_args1"}}, {{"name": "tool_name2", "arguments": "tool_args2"}}] - -The user will execute tool-calls and return responses from tool(s) in this format: -[{{"tool_response1"}}, {{"tool_response2"}}] - -Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user. - -User -Hello, please help me. -Assistant -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}] - -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: ' -Common Suffix: ' Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Left (difference): '' -Right (difference): '{"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_start|>assistant -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -<|im_start|>user -Thank you.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Common Suffix: '<|im_end|> -' -Left (difference): '' -Right (difference): ' - -{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): '' -Right (difference): ' - -{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: ' -Common Suffix: ' Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Left (difference): '' -Right (difference): '{"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_start|>assistant -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -<|im_start|>user -Thank you.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Common Suffix: '<|im_end|> -' -Left (difference): '' -Right (difference): ' - -{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): '' -Right (difference): ' - -{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: {"type": "function", "function": {"name": "test_function_name", "description": "test_function_name(param1: str, param2: str) - A test function for debugging - - Args: - param1(str): First parameter param2(str): Second parameter", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} -For each function call return a json object with function name and arguments within XML tags as follows: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/Qwen-QwQ-32B.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_start|>' -Common Suffix: 'user -Hello, please help me.<|im_end|> -' -Left (difference): '' -Right (difference): 'system - - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_start|>user -Hello, please help me.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_start|>assistant - -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -<|im_start|>user -Thank you.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_start|>system - - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Analysis failed: ------------- -While executing CallExpression at line 31, column 52 in source: -... {%- set content = message.content.split('')[-1].lstrip('\n') %}↵ ... - ^ -Error: Callee is not a function: got Undefined (hint: 'split') - -================================================================================ - ANALYZING TEMPLATE: models/templates/Qwen-Qwen2.5-7B-Instruct.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' -Common Suffix: '<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Left (difference): '' -Right (difference): ' - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_start|>assistant -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -<|im_start|>user -Thank you.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Common Suffix: '<|im_end|> -' -Left (difference): '' -Right (difference): ' - -{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): '' -Right (difference): ' - -{"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}} -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are Qwen, created by Alibaba Cloud. You are a helpful assistant. - -# Tools - -You may call one or more functions to assist with the user query. - -You are provided with function signatures within XML tags: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - - -For each function call, return a json object with function name and arguments within XML tags: - -{"name": , "arguments": } -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - -{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}} -<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/Qwen3-Coder.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_start|>' -Common Suffix: 'user -Hello, please help me.<|im_end|> -' -Left (difference): '' -Right (difference): 'system -You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. - -# Tools - -You have access to the following tools: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a tool ONLY reply in the following format with NO suffix: - - - - -value_1 - - -value_2 - - - - - -Reminder: -- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_start|>user -Hello, please help me.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_start|>assistant -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -I can help you with that.<|im_end|> -<|im_start|>user -Thank you.<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_start|>system -You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. - -# Tools - -You have access to the following tools: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a tool ONLY reply in the following format with NO suffix: - - - - -value_1 - - -value_2 - - - - - -Reminder: -- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' - - -value1 - - -value2 - - -' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. - -# Tools - -You have access to the following tools: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a tool ONLY reply in the following format with NO suffix: - - - - -value_1 - - -value_2 - - - - - -Reminder: -- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' - - -value1 - - -value2 - - -' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_start|>system -You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. - -# Tools - -You have access to the following tools: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a tool ONLY reply in the following format with NO suffix: - - - - -value_1 - - -value_2 - - - - - -Reminder: -- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - - - -value1 - - -value2 - - -' -Common Suffix: '<|im_end|> -' -Left (difference): '' -Right (difference): ' - - - -value3 - - -value4 - - -' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_start|>system -You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. - -# Tools - -You have access to the following tools: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a tool ONLY reply in the following format with NO suffix: - - - - -value_1 - - -value_2 - - - - - -Reminder: -- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - - - -value1 - - -value2 - - -' -Common Suffix: '<|im_end|> -<|im_start|>user -Continue.<|im_end|> -' -Left (difference): '' -Right (difference): ' - - - -value3 - - -value4 - - -' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_start|>system -You are Qwen, a helpful AI assistant that can interact with a computer to solve tasks. - -# Tools - -You have access to the following tools: - - - -test_function_name -A test function for debugging - - -param1 -string -First parameter - - -param2 -string -Second parameter - -["param1", "param2"] - - - - -If you choose to call a tool ONLY reply in the following format with NO suffix: - - - - -value_1 - - -value_2 - - - - - -Reminder: -- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening tag and end with a closing tag. -- Required parameters MUST be specified -- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after -- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls -<|im_end|> -<|im_start|>user -Hello, please help me.<|im_end|> -<|im_start|>assistant - - - -value1 - - -value2 - - -<|im_end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: false -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|User|>Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|User|>Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|Assistant|> -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|><|User|>Thank you.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' -Common Suffix: '' -Left (difference): 'Let me help you.<|end▁of▁sentence|>' -Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8b028d0, "param2":0x6414d8abda40} -```<|tool▁call▁end|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' -Common Suffix: '<|User|>Continue.' -Left (difference): 'Let me help you.<|end▁of▁sentence|>' -Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8abea30, "param2":0x6414d8aba0e0} -```<|tool▁call▁end|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8a' -Common Suffix: '' -Left (difference): 'ce970, "param2":0x6414d8abbb70} -```<|tool▁call▁end|>' -Right (difference): 'b5ac0, "param2":0x6414d8aba960} -```<|tool▁call▁end|> -<|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8b09df0, "param2":0x6414d8b0d3a0} -```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8' -Common Suffix: '<|User|>Continue.' -Left (difference): 'abb400, "param2":0x6414d8aff760} -```<|tool▁call▁end|>' -Right (difference): 'b17be0, "param2":0x6414d8ab7550} -```<|tool▁call▁end|> -<|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8aba960, "param2":0x6414d8ab5ac0} -```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8a' -Common Suffix: '0} -```<|tool▁call▁end|>' -Left (difference): 'def30, "param2":0x6414d8aba0e' -Right (difference): 'be4e0, "param2":0x6414d8ae266' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: false -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|User|>Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|User|>Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|Assistant|> -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|><|User|>Thank you.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' -Common Suffix: '<|end▁of▁sentence|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8ab9ec0, "param2":0x6414d8ac6240} -```<|tool▁call▁end|><|tool▁calls▁end|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' -Common Suffix: '<|end▁of▁sentence|><|User|>Continue.' -Left (difference): 'Let me help you.' -Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8aeabd0, "param2":0x6414d8abda40} -```<|tool▁call▁end|><|tool▁calls▁end|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8ab' -Common Suffix: '0} -```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' -Left (difference): 'b620, "param2":0x6414d8abd82' -Right (difference): '9420, "param2":0x6414d8ac14b0} -```<|tool▁call▁end|> -<|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8abbb70, "param2":0x6414d8abda4' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8a' -Common Suffix: '0} -```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|><|User|>Continue.' -Left (difference): 'c4150, "param2":0x6414d8abf2b' -Right (difference): 'bea30, "param2":0x6414d8aba410} -```<|tool▁call▁end|> -<|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8ab9420, "param2":0x6414d8ac46a' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8ab' -Common Suffix: '0} -```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' -Left (difference): '5ac0, "param2":0x6414d8adef3' -Right (difference): 'a960, "param2":0x6414d8ac4bf' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/deepseek-ai-DeepSeek-V3.1.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|User|>Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|User|>Hello, please help me.' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|Assistant|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>I can help you with that.<|end▁of▁sentence|><|User|>Thank you.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' -Common Suffix: '<|end▁of▁sentence|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8ae5a10, "param2":0x6414d8abe810}<|tool▁call▁end|><|tool▁calls▁end|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|>' -Common Suffix: '<|end▁of▁sentence|><|User|>Continue.' -Left (difference): 'Let me help you.' -Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8ac7cb0, "param2":0x6414d8abe4e0}<|tool▁call▁end|><|tool▁calls▁end|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8a' -Common Suffix: '0}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' -Left (difference): 'c5f10, "param2":0x6414d8ac59c' -Right (difference): 'be4e0, "param2":0x6414d8ac2e30}<|tool▁call▁end|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8acbf00, "param2":0x6414d8ac7cb' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8' -Common Suffix: '0}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|><|User|>Continue.' -Left (difference): 'b0a810, "param2":0x6414d8adf7b' -Right (difference): 'ac2e30, "param2":0x6414d8ac46a0}<|tool▁call▁end|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8ac3e20, "param2":0x6414d8abe4e' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>test_function_name<|tool▁sep|>{"param1":0x6414d8ac' -Common Suffix: '0}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' -Left (difference): '7cb0, "param2":0x6414d8ad63e' -Right (difference): '59c0, "param2":0x6414d8abc0c' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/fireworks-ai-llama-3-firefunction-v2.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> - -Thank you.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Left (difference): 'Let me help you.' -Right (difference): ' functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8ab9970, "param2":0x6414d8ac3380}}]' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Left (difference): 'Let me help you.' -Right (difference): ' functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8b04f50, "param2":0x6414d8ab6cd0}}]' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - - functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8a' -Common Suffix: '0}}]<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Left (difference): 'bb950, "param2":0x6414d8abb40' -Right (difference): 'c1290, "param2":0x6414d8abf2b0}}, {"name": "test_function_name", "arguments": {"param1":0x6414d8b04f50, "param2":0x6414d8adef3' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - - functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8a' -Common Suffix: '0}}]<|eot_id|><|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Left (difference): 'c4150, "param2":0x6414d8b103a' -Right (difference): 'bed60, "param2":0x6414d8ae0430}}, {"name": "test_function_name", "arguments": {"param1":0x6414d8adef30, "param2":0x6414d8b04f5' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are a helpful assistant with access to functions. -In addition to plain text responses, you can chose to call one or more of the provided functions. - -Use the following rule to decide when to call a function: - * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so - * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls - -If you decide to call functions: - * prefix function calls with functools marker (no closing marker required) - * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] - * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples - * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 - * make sure you pick the right functions that match the user intent - -Available functions as JSON spec: - -Today is .<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - - functools[{"name": "test_function_name", "arguments": {"param1":0x6414d8a' -Common Suffix: '0}}]<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Left (difference): 'c46a0, "param2":0x6414d8b2231' -Right (difference): 'bb950, "param2":0x6414d8abb40' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/google-gemma-2-2b-it.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: false -supports_parallel_tool_calls: false -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: 'user -Hello, please help me. -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: 'user -Hello, please help me. -' -Common Suffix: '' -Left (difference): '' -Right (difference): 'model -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: 'user -Hello, please help me. -model -I can help you with that. -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: 'user -Hello, please help me. -model -I can help you with that. -user -Thank you. -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: 'user -Hello, please help me. -model -' -Common Suffix: ' -' -Left (difference): 'Let me help you.' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: 'user -Hello, please help me. -model -' -Common Suffix: ' -user -Continue. -' -Left (difference): 'Let me help you.' -Right (difference): '' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: 'user -Hello, please help me. -model - -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: 'user -Hello, please help me. -model - -user -Continue. -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: 'user -Hello, please help me. -model - -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: false -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful ' -Common Suffix: '<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -' -Left (difference): 'AI assistant.' -Right (difference): 'assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> -<|start_of_role|>available_tools<|end_of_role|>[ - { - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } - } -]' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful AI assistant.<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|start_of_role|>assistant<|end_of_role|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful AI assistant.<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -<|start_of_role|>assistant<|end_of_role|>I can help you with that.<|end_of_text|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful AI assistant.<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -<|start_of_role|>assistant<|end_of_role|>I can help you with that.<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Thank you.<|end_of_text|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> -<|start_of_role|>available_tools<|end_of_role|>[ - { - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } - } -]<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -<|start_of_role|>assistant<|end_of_role|>' -Common Suffix: '<|end_of_text|> -' -Left (difference): 'Let me help you.' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> -<|start_of_role|>available_tools<|end_of_role|>[ - { - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } - } -]<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -<|start_of_role|>assistant<|end_of_role|>' -Common Suffix: '<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Continue.<|end_of_text|> -' -Left (difference): 'Let me help you.' -Right (difference): '' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> -<|start_of_role|>available_tools<|end_of_role|>[ - { - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } - } -]<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -<|start_of_role|>assistant<|end_of_role|><|end_of_text|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> -<|start_of_role|>available_tools<|end_of_role|>[ - { - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } - } -]<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -<|start_of_role|>assistant<|end_of_role|><|end_of_text|> -<|start_of_role|>user<|end_of_role|>Continue.<|end_of_text|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024. Today's Date: January 26, 2026. You are Granite, developed by IBM. You are a helpful assistant with access to the following tools. When a tool is required to answer the user's query, respond only with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.<|end_of_text|> -<|start_of_role|>available_tools<|end_of_role|>[ - { - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } - } -]<|end_of_text|> -<|start_of_role|>user<|end_of_role|>Hello, please help me.<|end_of_text|> -<|start_of_role|>assistant<|end_of_role|><|end_of_text|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/llama-cpp-deepseek-r1.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: false -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '' -Common Suffix: '<|User|>Hello, please help me.<|end▁of▁sentence|>' -Left (difference): '' -Right (difference): 'You can call any of the following function tools to satisfy the user's requests: [ - { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -] - -Example function tool call syntax: - -<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name -```json -{ - "arg1": "some_value" - ... -} -``` -<|tool▁call▁end|><|tool▁calls▁end|> - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|User|>Hello, please help me.<|end▁of▁sentence|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|Assistant|> -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|>I can help you with that.<|end▁of▁sentence|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|>I can help you with that.<|end▁of▁sentence|><|User|>Thank you.<|end▁of▁sentence|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ - { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -] - -Example function tool call syntax: - -<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name -```json -{ - "arg1": "some_value" - ... -} -``` -<|tool▁call▁end|><|tool▁calls▁end|> - -<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|>' -Common Suffix: '<|end▁of▁sentence|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8b06690, "param2":0x6414d8abf070} -```<|tool▁call▁end|><|tool▁calls▁end|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ - { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -] - -Example function tool call syntax: - -<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name -```json -{ - "arg1": "some_value" - ... -} -``` -<|tool▁call▁end|><|tool▁calls▁end|> - -<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|>' -Common Suffix: '<|end▁of▁sentence|><|User|>Continue.<|end▁of▁sentence|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8acb270, "param2":0x6414d8ab6cd0} -```<|tool▁call▁end|><|tool▁calls▁end|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ - { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -] - -Example function tool call syntax: - -<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name -```json -{ - "arg1": "some_value" - ... -} -``` -<|tool▁call▁end|><|tool▁calls▁end|> - -<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8' -Common Suffix: '0} -```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' -Left (difference): 'b09420, "param2":0x6414d8ac46a' -Right (difference): 'ac1290, "param2":0x6414d8ab6cd0} -```<|tool▁call▁end|> -<|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8b17be0, "param2":0x6414d8ae5f2' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ - { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -] - -Example function tool call syntax: - -<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name -```json -{ - "arg1": "some_value" - ... -} -``` -<|tool▁call▁end|><|tool▁calls▁end|> - -<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8' -Common Suffix: '0} -```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|><|User|>Continue.<|end▁of▁sentence|>' -Left (difference): 'ac2c10, "param2":0x6414d8aba63' -Right (difference): 'b06690, "param2":0x6414d8ab6cd0} -```<|tool▁call▁end|> -<|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8aba960, "param2":0x6414d8aba0e' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: 'You can call any of the following function tools to satisfy the user's requests: [ - { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -] - -Example function tool call syntax: - -<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name -```json -{ - "arg1": "some_value" - ... -} -``` -<|tool▁call▁end|><|tool▁calls▁end|> - -<|User|>Hello, please help me.<|end▁of▁sentence|><|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>test_function_name -```json -{"param1":0x6414d8' -Common Suffix: '0} -```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>' -Left (difference): 'abf070, "param2":0x6414d8aba63' -Right (difference): 'b09420, "param2":0x6414d8b17be' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/meetkai-functionary-medium-v3.1.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - -' -Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|>' -Left (difference): '' -Right (difference): ' -You have access to the following functions: - -Use the function 'test_function_name' to 'A test function for debugging' -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -Think very carefully before calling functions. -If a you choose to call a function ONLY reply in the following format: -<{start_tag}={function_name}>{parameters}{end_tag} -where - -start_tag => ` a JSON dict with the function argument name as key and function argument value as value. -end_tag => `` - -Here is an example, -{"example_name": "example_value"} - -Reminder: -- If looking for real time information use relevant functions before falling back to brave_search -- Function calls MUST follow the specified format, start with -- Required parameters MUST be specified -- Only call one function at a time -- Put the entire function call reply on one line - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|start_header_id|>assistant<|end_header_id|> - -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> - -Thank you.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - - -You have access to the following functions: - -Use the function 'test_function_name' to 'A test function for debugging' -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -Think very carefully before calling functions. -If a you choose to call a function ONLY reply in the following format: -<{start_tag}={function_name}>{parameters}{end_tag} -where - -start_tag => ` a JSON dict with the function argument name as key and function argument value as value. -end_tag => `` - -Here is an example, -{"example_name": "example_value"} - -Reminder: -- If looking for real time information use relevant functions before falling back to brave_search -- Function calls MUST follow the specified format, start with -- Required parameters MUST be specified -- Only call one function at a time -- Put the entire function call reply on one line - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '' -Left (difference): 'Let me help you.<|eot_id|>' -Right (difference): '{"param1":0x6414d8ae7330, "param2":0x6414d8aaf400}<|eom_id|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - - -You have access to the following functions: - -Use the function 'test_function_name' to 'A test function for debugging' -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -Think very carefully before calling functions. -If a you choose to call a function ONLY reply in the following format: -<{start_tag}={function_name}>{parameters}{end_tag} -where - -start_tag => ` a JSON dict with the function argument name as key and function argument value as value. -end_tag => `` - -Here is an example, -{"example_name": "example_value"} - -Reminder: -- If looking for real time information use relevant functions before falling back to brave_search -- Function calls MUST follow the specified format, start with -- Required parameters MUST be specified -- Only call one function at a time -- Put the entire function call reply on one line - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|>' -Left (difference): 'Let me help you.<|eot_id|>' -Right (difference): '{"param1":0x6414d8ac1f50, "param2":0x6414d8aba630}<|eom_id|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - - -You have access to the following functions: - -Use the function 'test_function_name' to 'A test function for debugging' -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -Think very carefully before calling functions. -If a you choose to call a function ONLY reply in the following format: -<{start_tag}={function_name}>{parameters}{end_tag} -where - -start_tag => ` a JSON dict with the function argument name as key and function argument value as value. -end_tag => `` - -Here is an example, -{"example_name": "example_value"} - -Reminder: -- If looking for real time information use relevant functions before falling back to brave_search -- Function calls MUST follow the specified format, start with -- Required parameters MUST be specified -- Only call one function at a time -- Put the entire function call reply on one line - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -{"param1":0x6414d8ab' -Common Suffix: '0}<|eom_id|>' -Left (difference): '7aa0, "param2":0x6414d8abb62' -Right (difference): '62f0, "param2":0x6414d8aba410}{"param1":0x6414d8ac5360, "param2":0x6414d8b05ef' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - - -You have access to the following functions: - -Use the function 'test_function_name' to 'A test function for debugging' -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -Think very carefully before calling functions. -If a you choose to call a function ONLY reply in the following format: -<{start_tag}={function_name}>{parameters}{end_tag} -where - -start_tag => ` a JSON dict with the function argument name as key and function argument value as value. -end_tag => `` - -Here is an example, -{"example_name": "example_value"} - -Reminder: -- If looking for real time information use relevant functions before falling back to brave_search -- Function calls MUST follow the specified format, start with -- Required parameters MUST be specified -- Only call one function at a time -- Put the entire function call reply on one line - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -{"param1":0x6414d8ab' -Common Suffix: '0}<|eom_id|><|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|>' -Left (difference): '9420, "param2":0x6414d8ae027' -Right (difference): 'e5f0, "param2":0x6414d8b24fa0}{"param1":0x6414d8ab7000, "param2":0x6414d8ac492' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - - -Cutting Knowledge Date: December 2023 - - -You have access to the following functions: - -Use the function 'test_function_name' to 'A test function for debugging' -{"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}} - - -Think very carefully before calling functions. -If a you choose to call a function ONLY reply in the following format: -<{start_tag}={function_name}>{parameters}{end_tag} -where - -start_tag => ` a JSON dict with the function argument name as key and function argument value as value. -end_tag => `` - -Here is an example, -{"example_name": "example_value"} - -Reminder: -- If looking for real time information use relevant functions before falling back to brave_search -- Function calls MUST follow the specified format, start with -- Required parameters MUST be specified -- Only call one function at a time -- Put the entire function call reply on one line - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -{"param1":0x6414d8ac' -Common Suffix: '0}<|eom_id|>' -Left (difference): 'b270, "param2":0x6414d8abf80' -Right (difference): '1290, "param2":0x6414d8abaeb' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/meetkai-functionary-medium-v3.2.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -' -Common Suffix: '} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|>' -Left (difference): '' -Right (difference): '// A test function for debugging -type test_function_name = (_: { -// First parameter. -param1: string, -// Second parameter. -param2: string, -}) => any; - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|start_header_id|>assistant<|end_header_id|> - ->>>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - ->>>all -I can help you with that.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - ->>>all -I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> - -Thank you.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -// A test function for debugging -type test_function_name = (_: { -// First parameter. -param1: string, -// Second parameter. -param2: string, -}) => any; - -} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - ->>>' -Common Suffix: '<|eot_id|>' -Left (difference): 'all -Let me help you.' -Right (difference): 'test_function_name -{"param1":0x6414d8af9280, "param2":0x6414d8af8a90}' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -// A test function for debugging -type test_function_name = (_: { -// First parameter. -param1: string, -// Second parameter. -param2: string, -}) => any; - -} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - ->>>' -Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|>' -Left (difference): 'all -Let me help you.' -Right (difference): 'test_function_name -{"param1":0x6414d8ae3c80, "param2":0x6414d8b39240}' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -// A test function for debugging -type test_function_name = (_: { -// First parameter. -param1: string, -// Second parameter. -param2: string, -}) => any; - -} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - ->>>test_function_name -{"param1":0x6414d8afff80, "param2":0x6414d8b3' -Common Suffix: '0}<|eot_id|>' -Left (difference): 'de2' -Right (difference): 'e8b0}>>>test_function_name -{"param1":0x6414d8ad7ff0, "param2":0x6414d8ae3c8' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -// A test function for debugging -type test_function_name = (_: { -// First parameter. -param1: string, -// Second parameter. -param2: string, -}) => any; - -} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - ->>>test_function_name -{"param1":0x6414d8a' -Common Suffix: '0}<|eot_id|><|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|>' -Left (difference): 'e4d40, "param2":0x6414d8abf04' -Right (difference): 'b5ac0, "param2":0x6414d8b3e8b0}>>>test_function_name -{"param1":0x6414d8b210c0, "param2":0x6414d8b0e31' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Only execute function(s) when absolutely necessary. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -// Supported function definitions that should be called when necessary. -namespace functions { - -// A test function for debugging -type test_function_name = (_: { -// First parameter. -param1: string, -// Second parameter. -param2: string, -}) => any; - -} // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|> - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - ->>>test_function_name -{"param1":0x6414d8a' -Common Suffix: '0}<|eot_id|>' -Left (difference): 'ba960, "param2":0x6414d8ab9ca' -Right (difference): 'c4bf0, "param2":0x6414d8ae4d4' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -' -Common Suffix: 'Hello, please help me.<|eot_id|>' -Left (difference): '' -Right (difference): '{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|start_header_id|>assistant<|end_header_id|> - -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> - -Thank you.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|eot_id|>' -Left (difference): 'Let me help you.' -Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|>' -Left (difference): 'Let me help you.' -Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' -Analysis failed: ------------- -While executing CallExpression at line 71, column 32 in source: -... == 1 %}↵ {{- raise_exception("This model only supports single tool-c... - ^ -Error: Jinja Exception: This model only supports single tool-calls at once! - -================================================================================ - ANALYZING TEMPLATE: models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jan 2026 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -' -Common Suffix: 'Hello, please help me.<|eot_id|>' -Left (difference): '' -Right (difference): '{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jan 2026 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|start_header_id|>assistant<|end_header_id|> - -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jan 2026 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jan 2026 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> - -Thank you.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jan 2026 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|eot_id|>' -Left (difference): 'Let me help you.' -Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jan 2026 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|>' -Left (difference): 'Let me help you.' -Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' -Analysis failed: ------------- -While executing CallExpression at line 72, column 32 in source: -... == 1 %}↵ {{- raise_exception("This model only supports single tool-c... - ^ -Error: Jinja Exception: This model only supports single tool-calls at once! - -================================================================================ - ANALYZING TEMPLATE: models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -' -Common Suffix: 'Hello, please help me.<|eot_id|>' -Left (difference): '' -Right (difference): '{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|start_header_id|>assistant<|end_header_id|> - -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -I can help you with that.<|eot_id|><|start_header_id|>user<|end_header_id|> - -Thank you.<|eot_id|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|eot_id|>' -Left (difference): 'Let me help you.' -Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|start_header_id|>system<|end_header_id|> - -Environment: ipython -Cutting Knowledge Date: December 2023 -Today Date: 26 Jul 2024 - -<|eot_id|><|start_header_id|>user<|end_header_id|> - -Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. - -Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables. - -{ - "type": "function", - "function": { - "name": "test_function_name", - "description": "A test function for debugging", - "parameters": { - "type": "object", - "properties": { - "param1": { - "type": "string", - "description": "First parameter" - }, - "param2": { - "type": "string", - "description": "Second parameter" - } - }, - "required": [ - "param1", - "param2" - ] - } - } -} - -Hello, please help me.<|eot_id|><|start_header_id|>assistant<|end_header_id|> - -' -Common Suffix: '<|eot_id|><|start_header_id|>user<|end_header_id|> - -Continue.<|eot_id|>' -Left (difference): 'Let me help you.' -Right (difference): '{"name": "test_function_name", "parameters": {"param1": "value1", "param2": "value2"}}' -Analysis failed: ------------- -While executing CallExpression at line 71, column 32 in source: -... == 1 %}↵ {{- raise_exception("This model only supports single tool-c... - ^ -Error: Jinja Exception: This model only supports single tool-calls at once! - -================================================================================ - ANALYZING TEMPLATE: models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '[SYSTEM_PROMPT]# HOW YOU SHOULD THINK AND ANSWER - -First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input. - -Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.[/SYSTEM_PROMPT]' -Common Suffix: '[INST]Hello, please help me.[/INST]' -Left (difference): '' -Right (difference): '[AVAILABLE_TOOLS][{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}][/AVAILABLE_TOOLS]' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '[SYSTEM_PROMPT]# HOW YOU SHOULD THINK AND ANSWER - -First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input. - -Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '[SYSTEM_PROMPT]# HOW YOU SHOULD THINK AND ANSWER - -First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input. - -Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '[SYSTEM_PROMPT]# HOW YOU SHOULD THINK AND ANSWER - -First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input. - -Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response to the user.[/THINK]Here, provide a self-contained response.[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.[INST]Thank you.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' -Analysis failed: ------------- -While executing FilterExpression at line 90, column 37 in source: -... }}↵ {%- elif message['content'] | length > 0 %}↵ {%- for bloc... - ^ -Error: Unknown (built-in) filter 'length' for type None - -================================================================================ - ANALYZING TEMPLATE: models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: false -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '[INST]Hello, please help me.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '[INST]Hello, please help me.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '[INST]Hello, please help me.[/INST]I can help you with that.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '[INST]Hello, please help me.[/INST]I can help you with that.[INST]Thank you.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' -Analysis failed: ------------- -While executing CallExpression at line 62, column 36 in source: -...9 %}↵ {{- raise_exception("Tool call IDs should be alphanumeric s... - ^ -Error: Jinja Exception: Tool call IDs should be alphanumeric strings with length 9! - -================================================================================ - ANALYZING TEMPLATE: models/templates/moonshotai-Kimi-K2.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|im_system|>' -Common Suffix: 'system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' -Left (difference): '' -Right (difference): 'tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|im_assistant|>assistant<|im_middle|>' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>I can help you with that.<|im_end|><|im_user|>user<|im_middle|>Thank you.<|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' -Common Suffix: '<|im_end|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|>' -Common Suffix: '<|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' -Left (difference): 'Let me help you.' -Right (difference): '<|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|>' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' -Common Suffix: '<|tool_calls_section_end|><|im_end|>' -Left (difference): '' -Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|>' -Common Suffix: '<|tool_calls_section_end|><|im_end|><|im_user|>user<|im_middle|>Continue.<|im_end|>' -Left (difference): '' -Right (difference): '<|tool_call_begin|>functions.test_function_name:1<|tool_call_argument_begin|>{"param1": "value3", "param2": "value4"}<|tool_call_end|>' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|im_system|>tool_declare<|im_middle|>[{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|><|im_user|>user<|im_middle|>Hello, please help me.<|im_end|><|im_assistant|>assistant<|im_middle|><|tool_calls_section_begin|><|tool_call_begin|>functions.test_function_name:0<|tool_call_argument_begin|>{"param1": "value1", "param2": "value2"}<|tool_call_end|><|tool_calls_section_end|><|im_end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/openai-gpt-oss-120b.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: true -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: false -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. -Knowledge cutoff: 2024-06 -Current date: 2026-01-26 - -Reasoning: medium - -# Valid channels: analysis, commentary, final. Channel must be included for every message.' -Common Suffix: '<|end|><|start|>user<|message|>Hello, please help me.<|end|>' -Left (difference): '' -Right (difference): ' -Calls to these tools must go to the commentary channel: 'functions'.<|end|><|start|>developer<|message|># Tools - -## functions - -namespace functions { - -// A test function for debugging -type test_function_name = (_: { -// First parameter -param1: string, -// Second parameter -param2: string, -}) => any; - -} // namespace functions' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. -Knowledge cutoff: 2024-06 -Current date: 2026-01-26 - -Reasoning: medium - -# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>Hello, please help me.<|end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|start|>assistant' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. -Knowledge cutoff: 2024-06 -Current date: 2026-01-26 - -Reasoning: medium - -# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>Hello, please help me.<|end|><|start|>assistant<|channel|>final<|message|>I can help you with that.<|return|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. -Knowledge cutoff: 2024-06 -Current date: 2026-01-26 - -Reasoning: medium - -# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>user<|message|>Hello, please help me.<|end|><|start|>assistant<|channel|>final<|message|>I can help you with that.<|end|><|start|>user<|message|>Thank you.<|end|>' -Common Suffix: '' -Left (difference): '' -Right (difference): '' -Analysis failed: ------------- -While executing BinaryExpression at line 264, column 53 in source: -...{%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<... - ^ -Error: Cannot perform operation on null values - -================================================================================ - ANALYZING TEMPLATE: models/templates/unsloth-Apriel-1.5.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].' -Common Suffix: ' -<|end|> -<|user|> -Hello, please help me. -<|end|> -' -Left (difference): '' -Right (difference): 'You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - -Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: -[{"name": , "arguments": }, {"name": , "arguments": },...] -<|end|> -<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. -<|end|> -<|user|> -Hello, please help me. -<|end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '<|assistant|> -' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. -<|end|> -<|user|> -Hello, please help me. -<|end|> -<|assistant|> -I can help you with that. -<|end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. -<|end|> -<|user|> -Hello, please help me. -<|end|> -<|assistant|> -I can help you with that. -<|end|> -<|user|> -Thank you. -<|end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without tool call (user, assistant) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - -Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: -[{"name": , "arguments": }, {"name": , "arguments": },...] -<|end|> -<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. -<|end|> -<|user|> -Hello, please help me. -<|end|> -<|assistant|> -' -Common Suffix: ' -<|end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}]' - -=== Diff: With vs Without tool call (user, assistant, user) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - -Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: -[{"name": , "arguments": }, {"name": , "arguments": },...] -<|end|> -<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. -<|end|> -<|user|> -Hello, please help me. -<|end|> -<|assistant|> -' -Common Suffix: ' -<|end|> -<|user|> -Continue. -<|end|> -' -Left (difference): 'Let me help you.' -Right (difference): ' -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}, "id": "call_001"}]' - -=== Diff: One vs Two tool calls (user, assistant) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - -Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: -[{"name": , "arguments": }, {"name": , "arguments": },...] -<|end|> -<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. -<|end|> -<|user|> -Hello, please help me. -<|end|> -<|assistant|> - -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}' -Common Suffix: '] -<|end|> -' -Left (difference): '' -Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}}' - -=== Diff: One vs Two tool calls (user, assistant, user) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - -Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: -[{"name": , "arguments": }, {"name": , "arguments": },...] -<|end|> -<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. -<|end|> -<|user|> -Hello, please help me. -<|end|> -<|assistant|> - -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}, "id": "call_001"}' -Common Suffix: '] -<|end|> -<|user|> -Continue. -<|end|> -' -Left (difference): '' -Right (difference): ', {"name": "test_function_name", "arguments": {"param1": "value3", "param2": "value4"}, "id": "call_002"}' - -=== Diff: Tool call with vs without reasoning_content (user, assistant) === -Common Prefix: '<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools: - -{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}} - -Return all function calls as a list of json objects within XML tags. Each json object should contain a function name and arguments as follows: -[{"name": , "arguments": }, {"name": , "arguments": },...] -<|end|> -<|system|> -You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE]. -<|end|> -<|user|> -Hello, please help me. -<|end|> -<|assistant|> - -[{"name": "test_function_name", "arguments": {"param1": "value1", "param2": "value2"}}] -<|end|> -' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Checking Reasoning Variables === -No reasoning/thinking-related variables were queried by the template - -================================================================================ - ANALYZING TEMPLATE: models/templates/unsloth-mistral-Devstral-Small-2507.jinja -================================================================================ - -=== Template Capabilities (from jinja::caps) === -supports_tools: false -supports_tool_calls: true -supports_system_role: true -supports_parallel_tool_calls: true -requires_typed_content: false - -=== Diff: With vs Without Tools (single user message) === -Common Prefix: '[SYSTEM_PROMPT]You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks. - - -Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed. -* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question. - - - -* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once. -* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations. - - - -* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it. -* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename. -* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times. - - - -* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself. -* When implementing solutions, focus on making the minimal changes needed to solve the problem. -* Before implementing any changes, first thoroughly understand the codebase through exploration. -* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate. - - - -* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise. -* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so. -* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible. -* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user. -* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification. - - - -* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise. -* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue. -* When updating a PR, preserve the original PR title and purpose, updating description only when necessary. - - - -1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions -2. ANALYSIS: Consider multiple approaches and select the most promising one -3. TESTING: - * For bug fixes: Create tests to verify issues before implementing fixes - * For new features: Consider test-driven development when appropriate - * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure - * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies -4. IMPLEMENTATION: Make focused, minimal changes to address the problem -5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests. - - - -* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect. -* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing. - - - -* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again. -* If you encounter missing dependencies: - 1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.) - 2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.) - 3. Only install individual packages directly if no dependency files are found or if only specific packages are needed -* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible. - - - -* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken: - 1. Step back and reflect on 5-7 different possible sources of the problem - 2. Assess the likelihood of each possible cause - 3. Methodically address the most likely causes, starting with the highest probability - 4. Document your reasoning process -* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding. -[/SYSTEM_PROMPT]' -Common Suffix: '[INST]Hello, please help me.[/INST]' -Left (difference): '' -Right (difference): '[AVAILABLE_TOOLS][{"type": "function", "function": {"name": "test_function_name", "description": "A test function for debugging", "parameters": {"type": "object", "properties": {"param1": {"type": "string", "description": "First parameter"}, "param2": {"type": "string", "description": "Second parameter"}}, "required": ["param1", "param2"]}}}][/AVAILABLE_TOOLS]' - -=== Diff: With vs Without add_generation_prompt (single user message) === -Common Prefix: '[SYSTEM_PROMPT]You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks. - - -Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed. -* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question. - - - -* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once. -* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations. - - - -* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it. -* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename. -* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times. - - - -* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself. -* When implementing solutions, focus on making the minimal changes needed to solve the problem. -* Before implementing any changes, first thoroughly understand the codebase through exploration. -* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate. - - - -* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise. -* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so. -* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible. -* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user. -* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification. - - - -* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise. -* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue. -* When updating a PR, preserve the original PR title and purpose, updating description only when necessary. - - - -1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions -2. ANALYSIS: Consider multiple approaches and select the most promising one -3. TESTING: - * For bug fixes: Create tests to verify issues before implementing fixes - * For new features: Consider test-driven development when appropriate - * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure - * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies -4. IMPLEMENTATION: Make focused, minimal changes to address the problem -5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests. - - - -* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect. -* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing. - - - -* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again. -* If you encounter missing dependencies: - 1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.) - 2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.) - 3. Only install individual packages directly if no dependency files are found or if only specific packages are needed -* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible. - - - -* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken: - 1. Step back and reflect on 5-7 different possible sources of the problem - 2. Assess the likelihood of each possible cause - 3. Methodically address the most likely causes, starting with the highest probability - 4. Document your reasoning process -* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding. -[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant) === -Common Prefix: '[SYSTEM_PROMPT]You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks. - - -Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed. -* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question. - - - -* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once. -* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations. - - - -* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it. -* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename. -* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times. - - - -* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself. -* When implementing solutions, focus on making the minimal changes needed to solve the problem. -* Before implementing any changes, first thoroughly understand the codebase through exploration. -* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate. - - - -* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise. -* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so. -* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible. -* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user. -* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification. - - - -* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise. -* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue. -* When updating a PR, preserve the original PR title and purpose, updating description only when necessary. - - - -1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions -2. ANALYSIS: Consider multiple approaches and select the most promising one -3. TESTING: - * For bug fixes: Create tests to verify issues before implementing fixes - * For new features: Consider test-driven development when appropriate - * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure - * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies -4. IMPLEMENTATION: Make focused, minimal changes to address the problem -5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests. - - - -* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect. -* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing. - - - -* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again. -* If you encounter missing dependencies: - 1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.) - 2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.) - 3. Only install individual packages directly if no dependency files are found or if only specific packages are needed -* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible. - - - -* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken: - 1. Step back and reflect on 5-7 different possible sources of the problem - 2. Assess the likelihood of each possible cause - 3. Methodically address the most likely causes, starting with the highest probability - 4. Document your reasoning process -* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding. -[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.' -Common Suffix: '' -Left (difference): '' -Right (difference): '' - -=== Diff: With vs Without reasoning_content (user, assistant, user) === -Common Prefix: '[SYSTEM_PROMPT]You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks. - - -Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed. -* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question. - - - -* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once. -* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations. - - - -* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it. -* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename. -* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times. - - - -* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself. -* When implementing solutions, focus on making the minimal changes needed to solve the problem. -* Before implementing any changes, first thoroughly understand the codebase through exploration. -* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate. - - - -* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise. -* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so. -* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible. -* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user. -* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification. - - - -* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise. -* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue. -* When updating a PR, preserve the original PR title and purpose, updating description only when necessary. - - - -1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions -2. ANALYSIS: Consider multiple approaches and select the most promising one -3. TESTING: - * For bug fixes: Create tests to verify issues before implementing fixes - * For new features: Consider test-driven development when appropriate - * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure - * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies -4. IMPLEMENTATION: Make focused, minimal changes to address the problem -5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests. - - - -* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect. -* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing. - - - -* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again. -* If you encounter missing dependencies: - 1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.) - 2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.) - 3. Only install individual packages directly if no dependency files are found or if only specific packages are needed -* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible. - - - -* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken: - 1. Step back and reflect on 5-7 different possible sources of the problem - 2. Assess the likelihood of each possible cause - 3. Methodically address the most likely causes, starting with the highest probability - 4. Document your reasoning process -* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding. -[/SYSTEM_PROMPT][INST]Hello, please help me.[/INST]I can help you with that.[INST]Thank you.[/INST]' -Common Suffix: '' -Left (difference): '' -Right (difference): '' -Analysis failed: ------------- -While executing MemberExpression at line 74, column 24 in source: -... {%- else %}↵ {{- message['content'][0]['text'] }}↵ {%- en... - ^ -Error: Cannot access property with non-string: got Integer - -================================================================================ - ANALYSIS COMPLETE -================================================================================ - \ No newline at end of file From b0853baca74d3d226db373d4c5481c3f2c51088c Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 01:39:00 +0100 Subject: [PATCH 08/39] Quick vibe-coded fix for proper object printing --- common/chat-diff-analyzer.cpp | 4 +++- common/chat.cpp | 2 +- common/jinja/value.h | 11 ++++++++++- tests/test-chat.cpp | 5 +++-- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 6afb9342c2..11aa9e3175 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -192,6 +192,7 @@ std::optional differential_analyzer::compare_variants( if (params_modifier) { params_modifier(params_B); } + // Apply template to both variants std::string output_A = apply_template(tmpl, params_A); @@ -683,7 +684,8 @@ void differential_analyzer::analyze_tool_call_format_json_native(const std::stri // we might not have the typical OpenAI tool calling structure int json_start = clean_haystack.find_first_of('{'); int json_end = clean_haystack.find_last_of('}'); - json call_struct = json::parse(clean_haystack.substr(json_start, json_end - json_start + 1)); + std::string cut = clean_haystack.substr(json_start, json_end - json_start + 1); + json call_struct = json::parse(cut); auto register_field = [&](const std::string & prefix, const nlohmann::detail::iteration_proxy_value & subel) { if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) { diff --git a/common/chat.cpp b/common/chat.cpp index edd98347b3..af809bcd72 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -116,7 +116,7 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const { {"type", "function"}, {"function", { {"name", tool_call.name}, - {"arguments", tool_call.arguments}, + {"arguments", json::parse(tool_call.arguments)}, }}, }; if (!tool_call.id.empty()) { diff --git a/common/jinja/value.h b/common/jinja/value.h index 0425bda5e3..df3eeaf444 100644 --- a/common/jinja/value.h +++ b/common/jinja/value.h @@ -502,12 +502,21 @@ struct value_object_t : public value_t { virtual bool is_immutable() const override { return false; } virtual const std::vector> & as_ordered_object() const override { return val_obj; } virtual string as_string() const override { + // Use JSON format for object string representation to ensure compatibility + // when concatenated in templates (e.g., '{"name": ' + arguments + '}') std::ostringstream ss; ss << "{"; for (size_t i = 0; i < val_obj.size(); i++) { if (i > 0) ss << ", "; auto & [key, val] = val_obj.at(i); - ss << value_to_string_repr(key) << ": " << value_to_string_repr(val); + // Use double quotes for keys (JSON format) + ss << "\"" << key->as_string().str() << "\": "; + if (is_val(val)) { + // Strings need to be quoted in JSON + ss << "\"" << val->as_string().str() << "\""; + } else { + ss << val->as_string().str(); + } } ss << "}"; return ss.str(); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 38798ec9d7..304370f2c1 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -973,7 +973,6 @@ static void test_msgs_oaicompat_json_conversion() { common_chat_msgs_to_json_oaicompat({ message_user_parts }).dump(2)); // Note: content is "" instead of null due to workaround for templates that render null as "None" - // Arguments are serialized as string for OAI compatibility assert_equals(std::string("[\n" " {\n" " \"role\": \"assistant\",\n" @@ -983,7 +982,9 @@ static void test_msgs_oaicompat_json_conversion() { " \"type\": \"function\",\n" " \"function\": {\n" " \"name\": \"python\",\n" - " \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n" + " \"arguments\": {\n" + " \"code\": \"print('hey')\"\n" + " }\n" " }\n" " }\n" " ]\n" From 80b7e161ff3246bcabbcd48e0268feda8f5deed7 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 02:27:01 +0100 Subject: [PATCH 09/39] Fix reasoning detection --- common/chat-auto-parser-helpers.cpp | 10 ++++++++++ common/chat-auto-parser-helpers.h | 5 ++++- common/chat-diff-analyzer.cpp | 8 ++++---- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp index 9c345d6f6e..4bf27f1dcb 100644 --- a/common/chat-auto-parser-helpers.cpp +++ b/common/chat-auto-parser-helpers.cpp @@ -374,3 +374,13 @@ std::vector segmentize_markers(const std::string & text) { return retval; } +std::vector prune_whitespace_segments(const std::vector & segments) { + std::vector result; + for (const auto & seg : segments) { + if (!trim_whitespace(seg.value).empty()) { + result.push_back(seg); + } + } + return result; +} + diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h index e9534d6715..445119be8e 100644 --- a/common/chat-auto-parser-helpers.h +++ b/common/chat-auto-parser-helpers.h @@ -19,4 +19,7 @@ std::string until_common_prefix(const std::string & full, const std::string & le std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right); // Segmentize text into markers and non-marker fragments -std::vector segmentize_markers(const std::string & text); \ No newline at end of file +std::vector segmentize_markers(const std::string & text); + +// Prune whitespace-only segments from a vector of segments +std::vector prune_whitespace_segments(const std::vector & segments); \ No newline at end of file diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 11aa9e3175..0082e3ab77 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -287,7 +287,7 @@ void differential_analyzer::compare_reasoning_presence(const common_chat_templat const std::string reasoning_content = "Let me think about this."; if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) { - auto seg = segmentize_markers(diff.right); + auto seg = prune_whitespace_segments(segmentize_markers(diff.right)); if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) { // easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace) result.reasoning = reasoning_mode::TAG_BASED; @@ -312,10 +312,10 @@ void differential_analyzer::compare_reasoning_presence(const common_chat_templat // right: reasoning_content // suffix: content // prefix: ... - auto suf_seg = segmentize_markers(diff.suffix); + auto suf_seg = prune_whitespace_segments(segmentize_markers(diff.suffix)); if (trim_whitespace(diff.left).empty() && suf_seg.size() >= 2 && suf_seg[0].type == segment_type::MARKER && trim_whitespace(suf_seg[1].value).substr(0, 11) == "I can help.") { - auto pre_seg = segmentize_markers(diff.prefix); + auto pre_seg = prune_whitespace_segments(segmentize_markers(diff.prefix)); if (pre_seg[pre_seg.size() - 1].type == segment_type::MARKER || (pre_seg.size() > 1 && trim_whitespace(pre_seg[pre_seg.size() - 1].value).empty() && pre_seg[pre_seg.size() - 2].type == segment_type::MARKER)) { @@ -577,7 +577,7 @@ void differential_analyzer::compare_content_values(const common_chat_template & LOG_DBG("C1: No content markers\n"); result.content = content_mode::PLAIN; found_plain_content = true; - } else if (result.reasoning == reasoning_mode::FORCED_CLOSED && + } else if (result.reasoning != reasoning_mode::NONE && !result.markers.reasoning_end.empty() && diff_reasoning.left.find(result.markers.reasoning_end) != std::string::npos) { std::string post_closed_reasoning = diff_reasoning.left.substr( diff_reasoning.left.find(result.markers.reasoning_end) + result.markers.reasoning_end.length()); From 9ba9a94819ba958a269f5216ef93e6791560ec47 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 13:59:58 +0100 Subject: [PATCH 10/39] More robust reasoning detection --- common/chat.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/common/chat.cpp b/common/chat.cpp index af809bcd72..4041d7d81e 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1,6 +1,7 @@ #include "chat.h" #include "chat-auto-parser.h" +#include "chat-diff-analyzer.h" #include "chat-peg-parser.h" #include "common.h" #include "ggml.h" @@ -234,7 +235,13 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates * const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); dummy_inputs.enable_thinking = true; const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs); - return rendered_no_thinking.prompt != rendered_with_thinking.prompt; + bool detect = rendered_no_thinking.prompt != rendered_with_thinking.prompt; + const auto & tmpl = chat_templates->template_tool_use + ? *chat_templates->template_tool_use + : *chat_templates->template_default; + diff_analysis_result result = differential_analyzer::analyze(tmpl); + detect |= result.reasoning != reasoning_mode::NONE; + return detect; } std::vector common_chat_msgs_parse_oaicompat(const json & messages) { From 3770566c45548de7970b2e9fdfec2e87f5381205 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 17:16:15 +0100 Subject: [PATCH 11/39] Reverd bad change fix some templates and most tests --- common/jinja/value.h | 14 +- models/templates/Apertus-8B-Instruct.jinja | 2 +- .../Apriel-1.6-15b-Thinker-fixed.jinja | 2 +- models/templates/StepFun3.5-Flash.jinja | 80 +++ ...fireworks-ai-llama-3-firefunction-v2.jinja | 2 +- models/templates/unsloth-Apriel-1.5.jinja | 2 +- tests/CMakeLists.txt | 1 + tests/test-chat-auto-parser.cpp | 2 +- tests/test-chat-template.cpp | 626 ++++++++++++++++++ tests/test-chat.cpp | 13 + 10 files changed, 726 insertions(+), 18 deletions(-) create mode 100644 models/templates/StepFun3.5-Flash.jinja create mode 100644 tests/test-chat-template.cpp diff --git a/common/jinja/value.h b/common/jinja/value.h index df3eeaf444..a2f92d2c69 100644 --- a/common/jinja/value.h +++ b/common/jinja/value.h @@ -12,7 +12,6 @@ #include #include #include -#include #include namespace jinja { @@ -502,21 +501,12 @@ struct value_object_t : public value_t { virtual bool is_immutable() const override { return false; } virtual const std::vector> & as_ordered_object() const override { return val_obj; } virtual string as_string() const override { - // Use JSON format for object string representation to ensure compatibility - // when concatenated in templates (e.g., '{"name": ' + arguments + '}') std::ostringstream ss; ss << "{"; for (size_t i = 0; i < val_obj.size(); i++) { if (i > 0) ss << ", "; auto & [key, val] = val_obj.at(i); - // Use double quotes for keys (JSON format) - ss << "\"" << key->as_string().str() << "\": "; - if (is_val(val)) { - // Strings need to be quoted in JSON - ss << "\"" << val->as_string().str() << "\""; - } else { - ss << val->as_string().str(); - } + ss << value_to_string_repr(key) << ": " << value_to_string_repr(val); } ss << "}"; return ss.str(); @@ -626,8 +616,6 @@ struct value_undefined_t : public value_t { value_undefined_t(const std::string & h = "") : hint(h) {} virtual std::string type() const override { return hint.empty() ? "Undefined" : "Undefined (hint: '" + hint + "')"; } virtual bool is_undefined() const override { return true; } - // note: some templates use "is none" as equivalent to "is undefined" - virtual bool is_none() const override { return true; } virtual bool as_bool() const override { return false; } virtual std::string as_repr() const override { return type(); } virtual const func_builtins & get_builtins() const override; diff --git a/models/templates/Apertus-8B-Instruct.jinja b/models/templates/Apertus-8B-Instruct.jinja index 10826ff690..48f1658f4c 100644 --- a/models/templates/Apertus-8B-Instruct.jinja +++ b/models/templates/Apertus-8B-Instruct.jinja @@ -294,7 +294,7 @@ {%- for tool_call in message.tool_calls -%} {%- if tool_call.type == 'function' -%} {%- set function = tool_call.function -%} - {{- '{"' + function.name + '": ' + function.arguments + '}' }} + {{- '{"' + function.name + '": ' + function.arguments|tojson + '}' }} {%- if not loop.last -%} {{- ", " }} {%- endif -%} diff --git a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja index 9df29255b7..c430f45580 100755 --- a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja +++ b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja @@ -130,7 +130,7 @@ {%- if tool_calls and tool_calls|length > 0 -%} {{ '\n[' }} {%- for tool_call in tool_calls -%} - {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string }} + {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson }} {%- if add_tool_id == true and 'id' in tool_call -%} {{ ', "id": "' + tool_call['id'] + '"' }} {%- endif -%} diff --git a/models/templates/StepFun3.5-Flash.jinja b/models/templates/StepFun3.5-Flash.jinja new file mode 100644 index 0000000000..c09ea497da --- /dev/null +++ b/models/templates/StepFun3.5-Flash.jinja @@ -0,0 +1,80 @@ +{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}{% endif %}{% endfor %}{% endif %}{% endmacro %} +{{bos_token}}{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- render_content(messages[0].content) + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson(ensure_ascii=False) }} + {%- endfor %} + {{- "\n\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner \n...\n block must be nested within \n...\n XML tags\n- Required parameters MUST be specified\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('') and render_content(message.content).endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- set content = render_content(message.content) %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %} + {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = render_content(message.reasoning_content) %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- else %} + {%- set reasoning_content = '' %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n' }} + {%- if tool_call.arguments is defined %} + {%- set arguments = tool_call.arguments %} + {%- for args_name, args_value in arguments|items %} + {{- '\n' }} + {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>tool_response\n' }} + {%- endif %} + {{- '' }} + {{- content }} + {{- '' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n\n' }} +{%- endif %} diff --git a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja index 9b8136df73..b94cfd4d9b 100644 --- a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +++ b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja @@ -46,7 +46,7 @@ Available functions as JSON spec: {%- if 'tool_calls' in message and message['tool_calls'] -%} {%- set tool = namespace(calls=[]) -%} {%- for call in message['tool_calls'] -%} - {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments'] + '}'] -%} + {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments']|tojson + '}'] -%} {%- endfor -%} {%- set ns.content = ns.content + ' functools[' + tool.calls | join(', ') + ']' -%} {%- endif -%} diff --git a/models/templates/unsloth-Apriel-1.5.jinja b/models/templates/unsloth-Apriel-1.5.jinja index 8e59d2f1d4..1639b63901 100644 --- a/models/templates/unsloth-Apriel-1.5.jinja +++ b/models/templates/unsloth-Apriel-1.5.jinja @@ -101,7 +101,7 @@ Prior to generating the function calls, you should generate the reasoning for wh {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%} {{- '\n[' -}} {%- for tool_call in message["tool_calls"] -%} - {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}} + {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson -}} {%- if add_tool_id == true -%} {{- ', "id": "' + tool_call['id'] + '"' -}} {%- endif -%} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ecc5e00c03..c73bd38dfd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -191,6 +191,7 @@ llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp) llama_build_and_test(test-jinja.cpp) llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python) llama_build_and_test(test-chat-auto-parser.cpp) +llama_build_and_test(test-chat-template.cpp) llama_build_and_test(test-json-partial.cpp) llama_build_and_test(test-log.cpp) llama_build_and_test( diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index 015c90d408..298a1b50bd 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -1287,7 +1287,7 @@ static void test_nemotron_tool_format(testing & t) { // Check function markers t.assert_equal("func_name_prefix should be '\\n'", ">\n", analysis.markers.func_name_suffix); - t.assert_equal("func_close should be ''", "", analysis.markers.func_close); + t.assert_equal("func_close should be '\\n'", "\n", analysis.markers.func_close); // Check argument markers (note: markers retain trailing newlines for proper parsing) t.assert_equal("arg_name_prefix should be ' Path to the JSON input file. + --stop-on-first-fail Stop testing on the first failure (default: false). + --no-common Use direct Jinja engine instead of common chat templates (default: use common). + --output Path to output results (only for single template runs). +If PATH_TO_TEMPLATE is a file, runs that single template. +If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory. +If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode). +)"; + +static std::string DEFAULT_JSON = R"({ + "messages": [ + { + "role": "user", + "content": "Hello, how are you?" + }, + { + "role": "assistant", + "content": "I am fine, thank you!" + } + ], + "bos_token": "", + "eos_token": "", + "add_generation_prompt": true +})"; + +int main(int argc, char ** argv) { + std::vector args(argv, argv + argc); + + std::string tmpl_path; + std::string json_path; + std::string output_path; + bool stop_on_first_fail = false; + bool use_common = true; + + for (size_t i = 1; i < args.size(); i++) { + if (args[i] == "--help" || args[i] == "-h") { + std::cout << HELP << "\n"; + return 0; + } + if (args[i] == "--json" && i + 1 < args.size()) { + json_path = args[i + 1]; + i++; + } else if (args[i] == "--stop-on-first-fail") { + stop_on_first_fail = true; + } else if (args[i] == "--output" && i + 1 < args.size()) { + output_path = args[i + 1]; + i++; + } else if (args[i] == "--no-common") { + use_common = true; + } else if (tmpl_path.empty()) { + tmpl_path = args[i]; + } else { + std::cerr << "Unknown argument: " << args[i] << "\n"; + std::cout << HELP << "\n"; + return 1; + } + } + + if (tmpl_path.empty()) { + return main_automated_tests(); + } + + json input_json; + if (!json_path.empty()) { + std::ifstream json_file(json_path); + if (!json_file) { + std::cerr << "Error: Could not open JSON file: " << json_path << "\n"; + return 1; + } + std::string content = std::string( + std::istreambuf_iterator(json_file), + std::istreambuf_iterator()); + input_json = json::parse(content); + } else { + input_json = json::parse(DEFAULT_JSON); + } + + std::filesystem::path p(tmpl_path); + if (std::filesystem::is_directory(p)) { + run_multiple(tmpl_path, stop_on_first_fail, input_json, use_common); + } else if (std::filesystem::is_regular_file(p)) { + std::ifstream infile(tmpl_path); + std::string contents = std::string( + std::istreambuf_iterator(infile), + std::istreambuf_iterator()); + run_single(contents, input_json, use_common, output_path); + } else { + std::cerr << "Error: PATH_TO_TEMPLATE is not a valid file or directory: " << tmpl_path << "\n"; + return 1; + } + + return 0; +} + +void run_multiple(const std::string& dir_path, bool stop_on_first_fail, const json& input, bool use_common) { + std::vector failed_tests; + + // list all files in models/templates/ and run each + size_t test_count = 0; + + for (const auto & entry : std::filesystem::directory_iterator(dir_path)) { + // only process .jinja files + if (entry.path().extension() == ".jinja" && entry.is_regular_file()) { + test_count++; + std::cout << "\n\n=== RUNNING TEMPLATE FILE: " << entry.path().string() << " ===\n"; + std::ifstream infile(entry.path()); + std::string contents((std::istreambuf_iterator(infile)), std::istreambuf_iterator()); + try { + run_single(contents, input, use_common); + } catch (const std::exception & e) { + std::cout << "Exception: " << e.what() << "\n"; + std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n"; + failed_tests.push_back(entry.path().string()); + if (stop_on_first_fail) { + break; + } + } + } + } + + std::cout << "\n\n=== TEST SUMMARY ===\n"; + std::cout << "Total tests run: " << test_count << "\n"; + std::cout << "Total failed tests: " << failed_tests.size() << "\n"; + for (const auto & test : failed_tests) { + std::cout << "FAILED TEST: " << test << "\n"; + } +} + + +static std::string normalize_newlines(const std::string & s) { +#ifdef _WIN32 + static const std::regex nl_regex("\r\n"); + return std::regex_replace(s, nl_regex, "\n"); +#else + return s; +#endif +} + + +static std::string format_using_common( + const std::string & template_str, + const std::string & bos_token, + const std::string & eos_token, + std::vector & messages, + std::vector tools = {}) { + auto tmpls = common_chat_templates_init(/* model= */ nullptr, template_str, bos_token, eos_token); + common_chat_templates_inputs inputs; + inputs.use_jinja = true; + inputs.messages = messages; + inputs.tools = std::move(tools); + inputs.add_generation_prompt = true; + auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt; + output = normalize_newlines(output); + return output; +} + + +// skip libcommon, use direct jinja engine +static jinja::value_string format_using_direct_engine( + const std::string & template_str, + json & input) { + // lexing + jinja::lexer lexer; + auto lexer_res = lexer.tokenize(template_str); + + // compile to AST + jinja::program ast = jinja::parse_from_tokens(lexer_res); + + // check caps for workarounds + jinja::caps_get(ast); + + std::cout << "\n=== RUN ===\n"; + jinja::context ctx(template_str); + + jinja::global_from_json(ctx, input, true); + + jinja::runtime runtime(ctx); + const jinja::value results = runtime.execute(ast); + auto parts = jinja::runtime::gather_string_parts(results); + + std::cout << "\n=== RESULTS ===\n"; + for (const auto & part : parts->as_string().parts) { + std::cout << (part.is_input ? "DATA" : "TMPL") << ": " << part.val << "\n"; + } + + return parts; +} + + +void run_single(const std::string& contents, json input, bool use_common, const std::string & output_path) { + jinja::enable_debug(true); + + jinja::value_string output_parts; + + if (use_common) { + std::string bos_token = ""; + std::string eos_token = ""; + if (input.contains("bos_token")) { + bos_token = input["bos_token"].get(); + } + if (input.contains("eos_token")) { + eos_token = input["eos_token"].get(); + } + nlohmann::ordered_json msgs_json = input["messages"]; + nlohmann::ordered_json tools_json = input["tools"]; + auto messages = common_chat_msgs_parse_oaicompat(msgs_json); + auto tools = common_chat_tools_parse_oaicompat(tools_json); + auto output = format_using_common(contents, bos_token, eos_token, messages, tools); + std::cout << "\n=== OUTPUT ===\n"; + std::cout << output << "\n"; + output_parts = jinja::mk_val(output); + + } else { + output_parts = format_using_direct_engine(contents, input); + std::cout << "\n=== OUTPUT ===\n"; + std::cout << output_parts->as_string().str() << "\n"; + } + + if (!output_path.empty()) { + std::ofstream outfile(output_path); + if (!outfile) { + throw std::runtime_error("Could not open output file: " + output_path); + } + outfile << output_parts->as_string().str(); + outfile.close(); + std::cout << "\n=== OUTPUT WRITTEN TO " << output_path << " ===\n"; + } +} + + + + + +// +// Automated tests for chat templates +// + +#define U8C(x) (const char*)(u8##x) + +static common_chat_msg simple_msg(const std::string & role, const std::string & content) { + common_chat_msg msg; + msg.role = role; + msg.content = content; + return msg; +} + +int main_automated_tests(void) { + // jinja::enable_debug(true); + + std::vector conversation { + {"system", "You are a helpful assistant"}, + {"user", "Hello"}, + {"assistant", "Hi there"}, + {"user", "Who are you"}, + {"assistant", " I am an assistant "}, + {"user", "Another question"}, + }; + + // std::string wrong = /* .template_str= */ u8"[gMASK]{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}"; + struct TestCase { + std::string name; + std::string template_str; + std::string expected_output; + std::string expected_output_jinja; + std::string bos_token = ""; + std::string eos_token = ""; + bool supported_with_jinja = true; + }; + std::vector test_cases { + { + /* .name= */ "teknium/OpenHermes-2.5-Mistral-7B", + /* .template_str= */ "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}", + /* .expected_output= */ "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\nHi there<|im_end|>\n<|im_start|>user\nWho are you<|im_end|>\n<|im_start|>assistant\n I am an assistant <|im_end|>\n<|im_start|>user\nAnother question<|im_end|>\n<|im_start|>assistant\n", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (NOTE: Old pre-v1 without a system prompt)", + /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + /* .expected_output= */ "[INST] You are a helpful assistant\nHello [/INST]Hi there
[INST] Who are you [/INST] I am an assistant
[INST] Another question [/INST]", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "TheBloke/FusionNet_34Bx2_MoE-AWQ", + /* .template_str= */ "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <>\\n' + messages[idx]['content'] + '\\n<>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' ' + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}", + /* .expected_output= */ "[INST] <>\nYou are a helpful assistant\n<>\n\nHello [/INST]Hi there[INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", + /* .expected_output_jinja= */ "[INST] <>\nYou are a helpful assistant\n<>\n\nHello [/INST] Hi there [INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "bofenghuang/vigogne-2-70b-chat", + /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'Vous êtes Vigogne, un assistant IA créé par Zaion Lab. Vous suivez extrêmement bien les instructions. Aidez autant que vous le pouvez.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}", + /* .expected_output= */ "[INST] <>\nYou are a helpful assistant\n<>\n\nHello [/INST]Hi there[INST] Who are you [/INST]I am an assistant[INST] Another question [/INST]", + /* .expected_output_jinja= */ "[INST] <>\nYou are a helpful assistant\n<>\n\nHello [/INST] Hi there [INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "mlabonne/AlphaMonarch-7B", + /* .template_str= */ "{% for message in messages %}{{bos_token + message['role'] + '\\n' + message['content'] + eos_token + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\\n' }}{% endif %}", + /* .expected_output= */ "system\nYou are a helpful assistant\nuser\nHello\nassistant\nHi there\nuser\nWho are you\nassistant\n I am an assistant \nuser\nAnother question\nassistant\n", + /* .expected_output_jinja= */ "system\nYou are a helpful assistant\nuser\nHello\nassistant\nHi there\nuser\nWho are you\nassistant\n I am an assistant \nuser\nAnother question\nassistant\n", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "google/gemma-7b-it", + /* .template_str= */ "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\\n' + message['content'] | trim + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\\n'}}{% endif %}", + /* .expected_output= */ "user\nYou are a helpful assistant\n\nHello\nmodel\nHi there\nuser\nWho are you\nmodel\nI am an assistant\nuser\nAnother question\nmodel\n", + /* .expected_output_jinja= */ "user\nYou are a helpful assistant\nHello\nmodel\nHi there\nuser\nWho are you\nmodel\nI am an assistant\nuser\nAnother question\nmodel\n", + }, + { + /* .name= */ "OrionStarAI/Orion-14B-Chat", + /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}", + /* .expected_output= */ "Human: You are a helpful assistant\n\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", + /* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "openchat/openchat-3.5-0106", + // The included chat_template differs from the author's suggestions here: https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d + // So we match against the included template but implement the suggested version. + /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}", + /* .expected_output= */ "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", + /* .expected_output_jinja= */ "GPT4 Correct System: You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", + }, + { + /* .name= */ "deepseek-ai/deepseek-coder-33b-instruct", + /* .template_str= */ "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}", + /* .expected_output= */ "You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n I am an assistant \n<|EOT|>\n### Instruction:\nAnother question\n### Response:\n", + /* .expected_output_jinja= */ "", + }, + { + /* .name= */ "eachadea/vicuna-13b-1.1", + // No template included in tokenizer_config.json, so this template likely needs to be manually set. + /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", + /* .expected_output= */ "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "Orca-Vicuna", + // No template included in tokenizer_config.json, so this template likely needs to be manually set. + /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", + /* .expected_output= */ "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "CohereForAI/c4ai-command-r-plus", + /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}", + /* .expected_output= */ "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + /* .expected_output_jinja= */ "", + }, + { + /* .name= */ "Llama-3", + /* .template_str= */ "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}", + /* .expected_output= */ "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + /* .expected_output_jinja= */ "", + }, + { + /* .name= */ "Phi-3-mini", + /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", + /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", + /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", + }, + { + /* .name= */ "Phi-3-small", + /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}", + /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", + /* .expected_output_jinja= */ "", + }, + { + /* .name= */ "Phi-3-medium", + /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", + /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", + /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", + }, + { + /* .name= */ "Phi-3-vision", + /* .template_str= */ "{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %}", + /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n I am an assistant <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "ChatGLM3", + /* .template_str= */ "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}", + /* .expected_output= */ "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n I am an assistant <|user|>\n Another question<|assistant|>", + /* .expected_output_jinja= */ "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n I am an assistant <|user|>\n Another question<|assistant|>", + }, + { + /* .name= */ "ChatGLM4", + /* .template_str= */ U8C("[gMASK]{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"), + /* .expected_output= */ "[gMASK]<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>\n", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "GLMEdge", + /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>", + /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>", + /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF", + /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + ''}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"), + /* .expected_output= */ U8C("You are a helpful assistant<用户>HelloHi there<用户>Who are youI am an assistant<用户>Another question"), + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "DeepSeek-V2", + /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}", + /* .expected_output= */ U8C("You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<|end▁of▁sentence|>User: Who are you\n\nAssistant: I am an assistant <|end▁of▁sentence|>User: Another question\n\nAssistant:"), + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "<|end▁of▁sentence|>", + }, + { + /* .name= */ "ibm-granite/granite-3.0-8b-instruct", + /* .template_str= */ "{%- if tools %}\n {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n {%- for tool in tools %}\n {{- tool | tojson(indent=4) }}\n {%- if not loop.last %}\n {{- '\n\n' }}\n {%- endif %}\n {%- endfor %}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'user' %}\n {{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant' %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant_tool_call' %}\n {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'tool_response' %}\n {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- endif %}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n {%- endif %}\n{%- endfor %}", + /* .expected_output= */ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>", + /* .expected_output_jinja= */ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>", + }, + { + /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (mistralai 'v1' template with a system prompt)", + /* .template_str= */ "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n", + /* .expected_output= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there [INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", + /* .expected_output_jinja= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there [INST] Who are you [/INST] I am an assistant [INST] Another question [/INST]", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "Mistral-Large-Instruct-2407 (mistralai 'v3' template; modified to have system prompt at start)", + /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- \"[AVAILABLE_TOOLS] [\" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{\"type\": \"function\", \"function\": {' }}\n {%- for key, val in tool.items() if key != \"return\" %}\n {%- if val is string %}\n {{- '\"' + key + '\": \"' + val + '\"' }}\n {%- else %}\n {{- '\"' + key + '\": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"[/AVAILABLE_TOOLS]\" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n {%- else %}\n {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n {%- endif %}\n {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n {{- \"[TOOL_CALLS] [\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- \" \" + message[\"content\"]|trim + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n", + /* .expected_output= */ "[INST] You are a helpful assistant\n\nHello[/INST] Hi there[INST] Who are you[/INST] I am an assistant[INST] Another question[/INST]", + /* .expected_output_jinja= */ "[INST] Hello[/INST] Hi there[INST] Who are you[/INST] I am an assistant[INST] You are a helpful assistant\n\nAnother question[/INST]", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "Mistral-Nemo-Instruct-2407 (mistralai 'v3-tekken' template; modified to have system prompt at start)", + /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- \"[AVAILABLE_TOOLS][\" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{\"type\": \"function\", \"function\": {' }}\n {%- for key, val in tool.items() if key != \"return\" %}\n {%- if val is string %}\n {{- '\"' + key + '\": \"' + val + '\"' }}\n {%- else %}\n {{- '\"' + key + '\": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \"}}\" }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"[/AVAILABLE_TOOLS]\" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- \"[INST]\" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n {%- else %}\n {{- \"[INST]\" + message[\"content\"] + \"[/INST]\" }}\n {%- endif %}\n {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n {{- \"[TOOL_CALLS][\" }}\n {%- for tool_call in message.tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- message[\"content\"] + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS]{\"content\": ' + content|string + \", \" }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n {%- endif %}\n {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n", + /* .expected_output= */ "[INST]You are a helpful assistant\n\nHello[/INST]Hi there[INST]Who are you[/INST] I am an assistant [INST]Another question[/INST]", + /* .expected_output_jinja= */ "[INST]Hello[/INST]Hi there[INST]Who are you[/INST] I am an assistant [INST]You are a helpful assistant\n\nAnother question[/INST]", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "mistralai/Mistral-Large-Instruct-2411 (mistralai 'v7' template)", + /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}", + /* .expected_output= */ "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT][INST] Hello[/INST] Hi there[INST] Who are you[/INST] I am an assistant [INST] Another question[/INST]", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "ai-sage/GigaChat-20B-A3B-instruct", + /* .template_str= */ "{% if messages[0]['role'] == 'system' -%}\n {%- set loop_messages = messages[1:] -%}\n {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n {%- set loop_messages = messages -%}\n {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n \n {%- if loop.index0 == 0 -%}\n {{ system_message -}}\n {%- endif -%}\n {%- if message['role'] == 'user' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if message['role'] == 'assistant' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if loop.last and add_generation_prompt -%}\n {{ 'assistant' + additional_special_tokens[0] -}}\n {%- endif -%}\n{%- endfor %}", + /* .expected_output= */ "You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|> I am an assistant <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + /* .supported_with_jinja= */ false, // Requires additional_special_tokens as extra context + }, + { + /* .name= */ "Infinigence/Megrez-3B-Instruct", + /* .template_str= */ U8C("{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"), + /* .expected_output= */ "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|> I am an assistant <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "phi-4", + /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|><|im_start|>assistant<|im_sep|>'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}", + /* .expected_output= */ "<|im_start|>system<|im_sep|>You are a helpful assistant<|im_end|><|im_start|>user<|im_sep|>Hello<|im_end|><|im_start|>assistant<|im_sep|>Hi there<|im_end|><|im_start|>user<|im_sep|>Who are you<|im_end|><|im_start|>assistant<|im_sep|> I am an assistant <|im_end|><|im_start|>user<|im_sep|>Another question<|im_end|><|im_start|>assistant<|im_sep|>", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct", + /* .template_str= */ "{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n {%- set name = tool.function.name %}\n {%- set description = tool.function.description|default('') %}\n {%- set parameters = tool.function.parameters|tojson %}\n {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n {{- tools_prefix }}\n {%- for tool in tools %}\n {{- __render_tool(tool) }}\n {%- endfor %}\n {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n {{- names.assistant }}\n {%- set call = message['function_call'] %}\n {%- if call %}\n {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n {%- else %}\n {{- ' ' + message.content + '\\n\\n' }}\n {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {{- __render_user_message(message) }}\n {%- endif %}\n {%- if message.role == 'assistant' and not loop.last %}\n {{- __render_assistant_message(message) }}\n {%- endif %}\n {%- if message.role == 'tool' %}\n {{- __render_tool_message(message) }}\n {%- endif %}\n {%- if loop.last %}\n {{- ' Ассистент:[SEP]' }}\n {%- endif %}\n{%- endfor %}\n", + /* .expected_output= */ " Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]", + /* .expected_output_jinja= */ " Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "inclusionAI/Ling-lite", + /* .template_str */ "{% for message in messages %}{% set role = message['role'] | lower %}{% if role == 'user' %}{% set role = 'HUMAN' %}{% endif %}{% set role = role | upper %}{{ '' + role + '' + message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT' }}{% endif %}", + /* .expected_output= */ "SYSTEMYou are a helpful assistantHUMANHelloASSISTANTHi thereHUMANWho are youASSISTANT I am an assistant HUMANAnother questionASSISTANT", + /* .expected_output_jinja= */ "", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, + { + /* .name= */ "ByteDance-Seed/Seed-OSS-36B-Instruct", + /* .template_str */ "{# #}{%- for message in messages %}{%- if message.role in [\"user\", \"system\"] %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- elif message.role == \"assistant\" %}{{ bos_token + message.role }}{%- if message.content is defined and message.content is string and message.content|trim|length > 0 %}{{ \"\\n\" + message.content|trim + eos_token }}{%- endif %}{%- else %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{ bos_token + \"assistant\\n\" }}{%- endif %}", + /* .expected_output= */ "system\nYou are a helpful assistantuser\nHelloassistant\nHi thereuser\nWho are youassistant\nI am an assistantuser\nAnother questionassistant\n", + /* .expected_output_jinja= */ "system\nYou are a helpful assistantuser\nHelloassistant\nHi thereuser\nWho are youassistant\nI am an assistantuser\nAnother questionassistant\n", + /* .bos_token= */ "", + /* .eos_token= */ "", + } + }; + std::vector formatted_chat(1024); + int32_t res; + + // list all supported templates + std::vector supported_tmpl; + res = llama_chat_builtin_templates(nullptr, 0); + assert(res > 0); + supported_tmpl.resize(res); + res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size()); + std::cout << "Built-in chat templates:\n"; + for (const auto *tmpl : supported_tmpl) { + std::cout << " " << tmpl << "\n"; + } + + // test invalid chat template + res = llama_chat_apply_template("INVALID TEMPLATE", conversation.data(), conversation.size(), true, formatted_chat.data(), formatted_chat.size()); + assert(res < 0); + const auto add_generation_prompt = true; + + for (const auto & test_case : test_cases) { + std::cout << "\n\n=== " << test_case.name << " ===\n\n"; + formatted_chat.resize(1024); + res = llama_chat_apply_template( + test_case.template_str.c_str(), + conversation.data(), + conversation.size(), + add_generation_prompt, + formatted_chat.data(), + formatted_chat.size() + ); + formatted_chat.resize(res); + std::string output(formatted_chat.data(), formatted_chat.size()); + if (output != test_case.expected_output) { + std::cout << "Expected:\n" << test_case.expected_output << "\n"; + std::cout << "-------------------------\n"; + std::cout << "Actual:\n" << output << "\n"; + std::cout.flush(); + assert(output == test_case.expected_output); + } + } + + std::vector messages; + messages.reserve(conversation.size()); + for (const auto & msg : conversation) { + messages.push_back(simple_msg(msg.role, msg.content)); + } + for (const auto & test_case : test_cases) { + if (!test_case.supported_with_jinja) { + continue; + } + std::cout << "\n\n=== " << test_case.name << " (jinja) ===\n\n"; + try { + auto output = format_using_common( + test_case.template_str, + test_case.bos_token, + test_case.eos_token, + messages); + auto expected_output = normalize_newlines(test_case.expected_output_jinja.empty() ? test_case.expected_output : test_case.expected_output_jinja); + if (output != expected_output) { + std::cout << "Template:```\n" << test_case.template_str << "\n```"; + std::cout << "-------------------------\n"; + std::cout << "Expected:```\n" << expected_output << "\n```"; + std::cout << "-------------------------\n"; + std::cout << "Actual:```\n" << output << "\n```"; + std::cout.flush(); + assert(output == expected_output); + } + } catch (const std::exception & e) { + std::cerr << "ERROR: " << e.what() << "\n"; + assert(false); + } + } + + std::cout << "\nOK: All tests passed successfully.\n"; + + return 0; +} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 304370f2c1..3d4a66217f 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -776,6 +776,9 @@ static void test_peg_parser(common_chat_templates * tmpls, } auto parser = make_peg_parser(tmpls, tc.params, detailed_debug); + if (detailed_debug) { + LOG_DBG("Using parser: \n%s\n", parser.arena_.dump(parser.arena_.root()).c_str()); + } common_chat_msg msg_accum; common_chat_msg msg_prev; @@ -2067,6 +2070,16 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); } + + { + auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug); + tst.test("I was thinkingNow I'm not."). + enable_thinking(true). + reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK). + expect_reasoning("I was thinking"). + expect_content("Now I'm not.") + .run(); + } } static void test_msg_diffs_compute() { From 384cafc98bd4176a1b9c3e190011e268071ac22a Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 17:33:35 +0100 Subject: [PATCH 12/39] Fix error in argument processing --- common/chat-peg-parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index f72bece7b0..a4f0751a0f 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -654,8 +654,8 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools( } // Mode 3: Flat keys (enhanced with ID fields and parameter ordering) else { - auto name_key_parser = literal("\"" + name_key + "\""); - auto args_key_parser = literal("\"" + args_key + "\""); + auto name_key_parser = literal("\"" + effective_name_key + "\""); + auto args_key_parser = literal("\"" + effective_args_key + "\""); for (const auto & tool_def : tools) { if (!tool_def.contains("function")) { From a01e15280ae3ca2afc220f1ba3c0920120add606 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 17:40:05 +0100 Subject: [PATCH 13/39] Feeding the hungry editor checker god. --- CMakePresets.json | 421 ++++++------------------- common/chat-auto-parser-generator.cpp | 9 +- common/chat-auto-parser-helpers.cpp | 6 +- common/chat-auto-parser-helpers.h | 2 +- common/chat-diff-analyzer.cpp | 1 - common/chat-diff-analyzer.h | 8 +- common/chat.cpp | 25 +- common/json-schema-to-grammar.cpp | 32 +- common/peg-parser.h | 14 +- tests/test-chat-auto-parser.cpp | 4 +- tests/test-chat-template.cpp | 4 +- tests/test-chat.cpp | 2 +- tools/parser/debug-template-parser.cpp | 2 +- 13 files changed, 137 insertions(+), 393 deletions(-) diff --git a/CMakePresets.json b/CMakePresets.json index accdd72d18..b5afeb3c0f 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -1,332 +1,95 @@ { - "version": 4, - "configurePresets": [ - { - "name": "base", - "hidden": true, - "generator": "Ninja", - "binaryDir": "${sourceDir}/build-${presetName}", - "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." - } - }, - { - "name": "sycl-base", - "hidden": true, - "generator": "Ninja", - "binaryDir": "${sourceDir}/build-${presetName}", - "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_CXX_COMPILER": "icx", - "CMAKE_C_COMPILER": "cl", - "GGML_SYCL": "ON", - "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." - } - }, - { - "name": "debug", - "hidden": true, - "cacheVariables": { - "CMAKE_BUILD_TYPE": "Debug" - } - }, - { - "name": "release", - "hidden": true, - "cacheVariables": { - "CMAKE_BUILD_TYPE": "Release" - } - }, - { - "name": "reldbg", - "hidden": true, - "cacheVariables": { - "CMAKE_BUILD_TYPE": "RelWithDebInfo" - } - }, - { - "name": "static", - "hidden": true, - "cacheVariables": { - "GGML_STATIC": "ON" - } - }, - { - "name": "sycl_f16", - "hidden": true, - "cacheVariables": { - "GGML_SYCL_F16": "ON" - } - }, - { - "name": "vulkan", - "hidden": true, - "cacheVariables": { - "GGML_VULKAN": "ON" - } - }, - { - "name": "x64-windows-llvm", - "hidden": true, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake" - } - }, - { - "name": "arm64-windows-llvm", - "hidden": true, - "architecture": { - "value": "arm64", - "strategy": "external" - }, - "toolset": { - "value": "host=x64", - "strategy": "external" - }, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake" - } - }, - { - "name": "arm64-apple-clang", - "hidden": true, - "architecture": { - "value": "arm64", - "strategy": "external" - }, - "toolset": { - "value": "host=x64", - "strategy": "external" - }, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake" - } - }, - { - "name": "x64-linux-gcc", - "hidden": true, - "cacheVariables": { - "CMAKE_C_COMPILER": "gcc", - "CMAKE_CXX_COMPILER": "g++" - } - }, - { - "name": "x64-linux-gcc-debug", - "inherits": [ - "base", - "x64-linux-gcc", - "debug" - ] - }, - { - "name": "x64-linux-gcc-release", - "inherits": [ - "base", - "x64-linux-gcc", - "release" - ] - }, - { - "name": "x64-linux-gcc-reldbg", - "inherits": [ - "base", - "x64-linux-gcc", - "reldbg" - ] - }, - { - "name": "x64-linux-gcc+static-release", - "inherits": [ - "base", - "x64-linux-gcc", - "release", - "static" - ] - }, - { - "name": "arm64-windows-llvm-debug", - "inherits": [ - "base", - "arm64-windows-llvm", - "debug" - ] - }, - { - "name": "arm64-windows-llvm-release", - "inherits": [ - "base", - "arm64-windows-llvm", - "reldbg" - ] - }, - { - "name": "arm64-windows-llvm+static-release", - "inherits": [ - "base", - "arm64-windows-llvm", - "reldbg", - "static" - ] - }, - { - "name": "arm64-apple-clang-debug", - "inherits": [ - "base", - "arm64-apple-clang", - "debug" - ] - }, - { - "name": "arm64-apple-clang-release", - "inherits": [ - "base", - "arm64-apple-clang", - "reldbg" - ] - }, - { - "name": "arm64-apple-clang+static-release", - "inherits": [ - "base", - "arm64-apple-clang", - "reldbg", - "static" - ] - }, - { - "name": "x64-windows-llvm-debug", - "inherits": [ - "base", - "x64-windows-llvm", - "debug" - ] - }, - { - "name": "x64-windows-llvm-release", - "inherits": [ - "base", - "x64-windows-llvm", - "release" - ] - }, - { - "name": "x64-windows-llvm-reldbg", - "inherits": [ - "base", - "x64-windows-llvm", - "reldbg" - ] - }, - { - "name": "x64-windows-llvm+static-release", - "inherits": [ - "base", - "x64-windows-llvm", - "reldbg", - "static" - ] - }, - { - "name": "x64-windows-msvc-debug", - "inherits": [ - "base", - "debug" - ] - }, - { - "name": "x64-windows-msvc-release", - "inherits": [ - "base", - "reldbg" - ] - }, - { - "name": "x64-windows-msvc+static-release", - "inherits": [ - "base", - "reldbg", - "static" - ] - }, - { - "name": "x64-windows-sycl-debug", - "inherits": [ - "sycl-base", - "debug" - ] - }, - { - "name": "x64-windows-sycl-debug-f16", - "inherits": [ - "sycl-base", - "debug", - "sycl_f16" - ] - }, - { - "name": "x64-windows-sycl-release", - "inherits": [ - "sycl-base", - "release" - ] - }, - { - "name": "x64-windows-sycl-release-f16", - "inherits": [ - "sycl-base", - "release", - "sycl_f16" - ] - }, - { - "name": "x64-windows-vulkan-debug", - "inherits": [ - "base", - "vulkan", - "debug" - ] - }, - { - "name": "x64-windows-vulkan-release", - "inherits": [ - "base", - "vulkan", - "release" - ] - }, - { - "name": "ilintar-release", - "hidden": false, - "description": "Release build", - "displayName": "Release build", - "binaryDir": "${sourceDir}/build", - "cacheVariables": { - "GGML_CUDA": "ON", - "GGML_CUDA_FORCE_CUBLAS": "OFF", - "GGML_CUDA_FORCE_MMQ": "OFF", - "GGML_CUDA_FA_ALL_QUANTS": "1", - "CMAKE_CUDA_ARCHITECTURES": "86;120", - "GGML_BLAS": "ON", - "GGML_BLAS_VENDOR": "OpenBLAS", - "GGML_CPU_ALL_VARIANTS": "ON", - "GGML_BACKEND_DL": "ON", - "CMAKE_CUDA_COMPILER": "nvcc" - }, - "inherits": [ - "base", - "release", - "x64-linux-gcc-release" - ] + "version": 4, + "configurePresets": [ + { + "name": "base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build-${presetName}", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." } - ], - "buildPresets": [ - { - "name": "parallel", - "description": "Parallel build", - "displayName": "Parallel build", - "configurePreset": "ilintar-release", - "jobs": 8 + }, + { + "name": "sycl-base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build-${presetName}", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_CXX_COMPILER": "icx", + "CMAKE_C_COMPILER": "cl", + "GGML_SYCL": "ON", + "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." } - ] -} \ No newline at end of file + }, + { "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } }, + { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } }, + { "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, + { "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } }, + { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } }, + { "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } }, + + { + "name": "x64-windows-llvm", "hidden": true, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake" + } + }, + + { + "name": "arm64-windows-llvm", "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake" + } + }, + + { + "name": "arm64-apple-clang", "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake" + } + }, + { + "name": "x64-linux-gcc", "hidden": true, + "cacheVariables": { + "CMAKE_C_COMPILER": "gcc", + "CMAKE_CXX_COMPILER": "g++" + } + }, + { "name": "x64-linux-gcc-debug", "inherits": [ "base", "x64-linux-gcc", "debug" ] }, + { "name": "x64-linux-gcc-release", "inherits": [ "base", "x64-linux-gcc", "release" ] }, + { "name": "x64-linux-gcc-reldbg", "inherits": [ "base", "x64-linux-gcc", "reldbg" ] }, + { "name": "x64-linux-gcc+static-release", "inherits": [ "base", "x64-linux-gcc", "release", "static" ] }, + + { "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] }, + { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] }, + { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] }, + + { "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] }, + { "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] }, + { "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] }, + + { "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] }, + { "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] }, + { "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] }, + { "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] }, + + { "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] }, + { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] }, + { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] }, + + { "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] }, + { "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] }, + { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }, + { "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] }, + + { "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] }, + { "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] } + ] +} diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index a721a30f1c..ba0cf66b08 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -26,7 +26,7 @@ common_chat_params universal_peg_generator::generate_parser(const common_chat_te return generate_parser(tmpl, inputs, analysis); } -common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl, +common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl, const struct templates_params & inputs, const diff_analysis_result & analysis) { // Check for thinking forced open @@ -120,9 +120,8 @@ common_peg_arena universal_peg_generator::build_parser(const diff_analysis_resul if (extracting_reasoning) { return reasoning + m.content_start + p.content(p.until(m.content_end)) + m.content_end + p.end(); - } - return p.content(p.until(m.content_start)) + m.content_start + - p.content(p.until(m.content_end)) + m.content_end + p.end(); + } + return p.content(p.until(m.content_start)) + m.content_start + p.content(p.until(m.content_end)) + m.content_end + p.end(); } return reasoning + p.content(p.rest()) + p.end(); }); @@ -358,4 +357,4 @@ common_peg_parser universal_peg_generator::build_tool_parser( } GGML_ABORT("Unable to create tool parser"); -} \ No newline at end of file +} diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp index 4bf27f1dcb..d2aec2d9bb 100644 --- a/common/chat-auto-parser-helpers.cpp +++ b/common/chat-auto-parser-helpers.cpp @@ -38,17 +38,17 @@ std::string trim_trailing_whitespace(const std::string & str) { if (str.empty()) { return ""; } - + size_t end = str.length() - 1; while (end > 0 && std::isspace(static_cast(str[end]))) { end--; } - + // If first char is also whitespace, return empty string if (end == 0 && std::isspace(static_cast(str[0]))) { return ""; } - + return str.substr(0, end + 1); } diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h index 445119be8e..53d3454566 100644 --- a/common/chat-auto-parser-helpers.h +++ b/common/chat-auto-parser-helpers.h @@ -22,4 +22,4 @@ std::string after_common_suffix(const std::string & full, const std::string & le std::vector segmentize_markers(const std::string & text); // Prune whitespace-only segments from a vector of segments -std::vector prune_whitespace_segments(const std::vector & segments); \ No newline at end of file +std::vector prune_whitespace_segments(const std::vector & segments); diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 0082e3ab77..53906102d8 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -192,7 +192,6 @@ std::optional differential_analyzer::compare_variants( if (params_modifier) { params_modifier(params_B); } - // Apply template to both variants std::string output_A = apply_template(tmpl, params_A); diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h index b1c601181e..7933de5ce3 100644 --- a/common/chat-diff-analyzer.h +++ b/common/chat-diff-analyzer.h @@ -97,7 +97,7 @@ enum class reasoning_mode { TAG_BASED, // Standard tag-based: ... DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter) FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end) - FORCED_CLOSED,// Template ends with open reasoning tag on enabled thinking but + FORCED_CLOSED,// Template ends with open reasoning tag on enabled thinking but // with both opened and closed tag for disabled thinking TOOLS_ONLY // Only reason on tool calls, not on normal content }; @@ -281,7 +281,7 @@ class differential_analyzer { const std::string & fun_name_needle, const std::string & arg_name_needle, diff_analysis_result & result); - + static void analyze_tool_call_format_non_json(const std::string & clean_haystack, const std::string & fun_name_needle, diff_analysis_result & result); @@ -324,7 +324,7 @@ class differential_analyzer { }; enum segment_type { - TEXT, + TEXT, MARKER }; @@ -344,4 +344,4 @@ struct segment { std::string value; segment(segment_type type, std::string value) : type(type), value(std::move(value)) {} -}; \ No newline at end of file +}; diff --git a/common/chat.cpp b/common/chat.cpp index 4041d7d81e..3fb17b4e9d 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1109,39 +1109,22 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_ tool_choice |= p.rule("tool-" + name, tool_parser); }); - // The model can output: - // 1. Just content: >>>all\n{content} - // 2. Just tool call(s): >>>function_name\n{json_args} - // 3. Both: >>>all\n{content}>>>function_name\n{json_args} - - // Option 1: Content only (no following tool call) auto content_only = content_until_end; - - // Option 2: Content followed by tool call(s) auto content_and_tools = content_until_tool + p.one_or_more(tool_choice); - - // Option 3: Just tool call(s) (no content) - auto tools_only = p.one_or_more(tool_choice); + auto tools_only = p.one_or_more(tool_choice); if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) { - // Must have at least one tool call if (inputs.parallel_tool_calls) { - // Multiple tool calls allowed return p.choice({ content_and_tools, tools_only }) + p.end(); } else { - // Single tool call only return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end(); } } else { - // Tool calls are optional (auto mode) if (inputs.parallel_tool_calls) { - // Multiple tool calls allowed return p.choice({ content_and_tools, content_only, tools_only }) + p.end(); - } else { - // Single tool call at most - auto content_and_tool = content_until_tool + tool_choice; - return p.choice({ content_and_tool, content_only, tool_choice }) + p.end(); } + auto content_and_tool = content_until_tool + tool_choice; + return p.choice({ content_and_tool, content_only, tool_choice }) + p.end(); } }); @@ -1244,7 +1227,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ if (tmpl.original_caps().supports_tool_calls) { // some templates will require the content field in tool call messages - // to still be non-null, this puts an empty string everywhere where the + // to still be non-null, this puts an empty string everywhere where the // content field is null workaround::requires_non_null_content(params.messages); } diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index efd2c8ef95..57a14dc9f4 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -27,10 +27,10 @@ static std::string build_repetition(const std::string & item_rule, int min_items if (separator_rule.empty()) { if (min_items == 1 && !has_max) { return item_rule + "+"; - } + } if (min_items == 0 && !has_max) { return item_rule + "*"; - } + } return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}"; } @@ -828,11 +828,11 @@ public: if (schema.contains("$ref")) { return _add_rule(rule_name, _resolve_ref(schema["$ref"])); - } + } if (schema.contains("oneOf") || schema.contains("anyOf")) { std::vector alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get>() : schema["anyOf"].get>(); return _add_rule(rule_name, _generate_union_rule(name, alt_schemas)); - } + } if (schema_type.is_array()) { std::vector schema_types; for (const auto & t : schema_type) { @@ -841,17 +841,17 @@ public: schema_types.push_back(schema_copy); } return _add_rule(rule_name, _generate_union_rule(name, schema_types)); - } + } if (schema.contains("const")) { return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space"); - } + } if (schema.contains("enum")) { std::vector enum_values; for (const auto & v : schema["enum"]) { enum_values.push_back(_generate_constant_rule(v)); } return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space"); - } + } if ((schema_type.is_null() || schema_type == "object") && (schema.contains("properties") || (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { @@ -873,7 +873,7 @@ public: _build_object_rule( properties, required, name, schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); - } + } if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) { std::unordered_set required; std::vector> properties; @@ -922,7 +922,7 @@ public: } } return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); - } + } if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; if (items.is_array()) { @@ -942,23 +942,23 @@ public: int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); - } + } if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { return _visit_pattern(schema["pattern"], rule_name); - } + } if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); - } + } if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { auto prim_name = schema_format + "-string"; return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name))); - } + } if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); int min_len = schema.contains("minLength") ? schema["minLength"].get() : 0; int max_len = schema.contains("maxLength") ? schema["maxLength"].get() : std::numeric_limits::max(); return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); - } + } if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { int64_t min_value = std::numeric_limits::min(); int64_t max_value = std::numeric_limits::max(); @@ -977,10 +977,10 @@ public: build_min_max_int(min_value, max_value, out); out << ") space"; return _add_rule(rule_name, out.str()); - } + } if (schema.empty() || schema_type == "object") { return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); - } + } if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { _errors.push_back("Unrecognized schema: " + schema.dump()); return ""; diff --git a/common/peg-parser.h b/common/peg-parser.h index 9bd5e05838..947c775f10 100644 --- a/common/peg-parser.h +++ b/common/peg-parser.h @@ -318,16 +318,16 @@ class common_peg_parser_builder { common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); } common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); } - - bool allow_python_dict_format_ = false; + + bool allow_python_dict_format_ = false; public: common_peg_parser_builder(); - - // Enable/disable Python dict format support (single-quoted strings). - // When enabled, JSON parsers will also accept Python dict-style single-quoted strings. - void set_allow_python_dict_format(bool allow) { allow_python_dict_format_ = allow; } - bool get_allow_python_dict_format() const { return allow_python_dict_format_; } + + // Enable/disable Python dict format support (single-quoted strings). + // When enabled, JSON parsers will also accept Python dict-style single-quoted strings. + void set_allow_python_dict_format(bool allow) { allow_python_dict_format_ = allow; } + bool get_allow_python_dict_format() const { return allow_python_dict_format_; } // Match nothing, always succeed. // S -> ε diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index 298a1b50bd..04122e9fae 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -117,7 +117,7 @@ static void test_marker_separation(testing & t) { t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value); t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value); }); - + t.test("single_diagonal_marker", [&] (testing & t) { t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type); t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type); @@ -1219,7 +1219,7 @@ static common_chat_template load_template(testing & t, const std::string & templ common_chat_template tmpl(template_source, "", ""); t.assert_true("Nemotron template loaded successfully", template_source.length() > 0); return tmpl; -} +} // ============================================================================ // Nemotron Template Analysis Tests diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index e21dbb04d1..48aac31d75 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -66,7 +66,7 @@ int main(int argc, char ** argv) { if (args[i] == "--help" || args[i] == "-h") { std::cout << HELP << "\n"; return 0; - } + } if (args[i] == "--json" && i + 1 < args.size()) { json_path = args[i + 1]; i++; @@ -623,4 +623,4 @@ int main_automated_tests(void) { std::cout << "\nOK: All tests passed successfully.\n"; return 0; -} \ No newline at end of file +} diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 3d4a66217f..5db03cc1c0 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -822,7 +822,7 @@ static void test_peg_parser(common_chat_templates * tmpls, } catch (std::exception & e) { throw std::runtime_error((std::string("Error comparing accumulated message to current: ") + e.what()).c_str()); } - + msg_prev = msg_current; } diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp index b8b4f3dfd3..06ceb0f02f 100644 --- a/tools/parser/debug-template-parser.cpp +++ b/tools/parser/debug-template-parser.cpp @@ -441,7 +441,7 @@ int main(int argc, char ** argv) { LOG_ERR("args_field: '%s'\n", analysis.args_field.c_str()); LOG_ERR("id_field: '%s'\n", analysis.id_field.c_str()); LOG_ERR("gen_id_field: '%s'\n", analysis.gen_id_field.c_str()); - LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.parameter_order.begin(), analysis.parameter_order.end(), + LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.parameter_order.begin(), analysis.parameter_order.end(), std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; } ).c_str()); From 09b447a487d9dcc325084f5796f89ed1b808d585 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 18:49:02 +0100 Subject: [PATCH 14/39] Fix incorrect coercion of strings to non-string types during parsing --- common/chat-peg-parser.cpp | 19 +++++++++++++++++++ tests/test-chat.cpp | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index a4f0751a0f..f84b6ba298 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -301,6 +301,25 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { } else { buffer_needs_closing_quote = true; } + } else if (is_arg_string_value) { + // Schema declares this as string type but it parsed as non-string (e.g., number) + // Force treatment as string value - add opening quote and escape content + if (!current_tool->name.empty()) { + if (!needs_closing_quote) { + value_to_add = "\""; + needs_closing_quote = true; + } + } else { + if (!buffer_needs_closing_quote) { + value_to_add = "\""; + buffer_needs_closing_quote = true; + } + } + std::string escaped = json(value_content).dump(); + if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { + escaped = escaped.substr(1, escaped.size() - 2); + } + value_to_add += escaped; } else { // For non-string values (number, bool, null, object, array), add raw value content // Using raw content instead of dump() ensures monotonicity for streaming diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 5db03cc1c0..18d8052a5d 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -321,6 +321,23 @@ static common_chat_tool edit_tool{ })", }; +static common_chat_tool magic_tool{ + /* .name = */ "magic", + /* .description = */ "Magic tool that takes a hash", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "ref": { + "type": "string" + } + }, + "required": ["name", "ref"] + })", +}; + static std::vector tools{ special_function_tool, special_function_tool_with_optional_param, python_tool, html_tool, todo_list }; @@ -2079,6 +2096,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) { expect_reasoning("I was thinking"). expect_content("Now I'm not.") .run(); + + // Test that numeric-looking string values are coerced to strings per the schema + tst.test( + "Let me call the magic tool\n" + "\n" + "\n" + "\n" + "\nfooBar\n\n" + "\n5123123\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ magic_tool }) + .expect_reasoning("Let me call the magic tool") + .expect_tool_calls({ + { "magic", R"({"name": "fooBar", "ref": "5123123"})", {} }, + }) + .run(); } } From f71ae707babf17497deabb3e2db11eab8e1a7726 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 19:07:01 +0100 Subject: [PATCH 15/39] Fix minor regressions, add [[noreturn]] attrib --- common/jinja/value.cpp | 16 ---------------- common/jinja/value.h | 33 +++++++++++++++++---------------- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp index 17d7eae764..38da1df6d3 100644 --- a/common/jinja/value.cpp +++ b/common/jinja/value.cpp @@ -428,22 +428,6 @@ const func_builtins & global_builtins() { bool res = it != builtins.end(); return mk_val(res); }}, - {"test_is_in", [](const func_args & args) -> value { - args.ensure_count(2, 2); - value val_needle = args.get_pos(0); - value val_haystack = args.get_pos(1); - const auto & haystack = is_val(val_haystack) ? val_haystack->as_array() : std::vector(1, val_haystack); - for (auto it = haystack.cbegin(); it != haystack.cend(); it++) { - if ((*it)->type() == val_needle->type()) { - if (is_val(val_haystack) ? - (*it)->as_string().str().find(val_needle->as_string().str()) != std::string::npos : - value_compare(*it, val_needle, value_compare_op::eq)) { - return mk_val(true); - } - } - } - return mk_val(false); - }}, {"test_is_sameas", [](const func_args & args) -> value { // Check if an object points to the same memory address as another object (void)args; diff --git a/common/jinja/value.h b/common/jinja/value.h index a2f92d2c69..a86f0f0587 100644 --- a/common/jinja/value.h +++ b/common/jinja/value.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace jinja { @@ -126,27 +127,27 @@ struct value_t { // Note: only for debugging and error reporting purposes virtual std::string type() const { return ""; } - virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); } - virtual double as_float() const { throw std::runtime_error(type() + " is not a float value"); } - virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); } - virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); } - virtual const std::vector & as_array() const { throw std::runtime_error(type() + " is not an array value"); } - virtual const std::vector> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); } - virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); } + [[noreturn]] virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); } + [[noreturn]] virtual double as_float() const { throw std::runtime_error(type() + " is not a float value"); } + [[noreturn]] virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); } + [[noreturn]] virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); } + [[noreturn]] virtual const std::vector & as_array() const { throw std::runtime_error(type() + " is not an array value"); } + [[noreturn]] virtual const std::vector> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); } + [[noreturn]] virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); } virtual bool is_none() const { return false; } virtual bool is_undefined() const { return false; } - virtual const func_builtins & get_builtins() const { + [[noreturn]] virtual const func_builtins & get_builtins() const { throw std::runtime_error("No builtins available for type " + type()); } - virtual bool has_key(const value &) { throw std::runtime_error(type() + " is not an object value"); } - virtual void insert(const value & /* key */, const value & /* val */) { throw std::runtime_error(type() + " is not an object value"); } - virtual value & at(const value & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); } - virtual value & at(const value & /* key */) { throw std::runtime_error(type() + " is not an object value"); } - virtual value & at(const std::string & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); } - virtual value & at(const std::string & /* key */) { throw std::runtime_error(type() + " is not an object value"); } - virtual value & at(int64_t /* idx */, value & /* default_val */) { throw std::runtime_error(type() + " is not an array value"); } - virtual value & at(int64_t /* idx */) { throw std::runtime_error(type() + " is not an array value"); } + [[noreturn]] virtual bool has_key(const value &) { throw std::runtime_error(type() + " is not an object value"); } + [[noreturn]] virtual void insert(const value & /* key */, const value & /* val */) { throw std::runtime_error(type() + " is not an object value"); } + [[noreturn]] virtual value & at(const value & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); } + [[noreturn]] virtual value & at(const value & /* key */) { throw std::runtime_error(type() + " is not an object value"); } + [[noreturn]] virtual value & at(const std::string & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); } + [[noreturn]] virtual value & at(const std::string & /* key */) { throw std::runtime_error(type() + " is not an object value"); } + [[noreturn]] virtual value & at(int64_t /* idx */, value & /* default_val */) { throw std::runtime_error(type() + " is not an array value"); } + [[noreturn]] virtual value & at(int64_t /* idx */) { throw std::runtime_error(type() + " is not an array value"); } virtual bool is_numeric() const { return false; } virtual bool is_hashable() const { return false; } From 15f7aa1fbe85fc9ed054dea5321ac06bf884df55 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 23:03:00 +0100 Subject: [PATCH 16/39] We don't like segfaults (or failing tests). --- tests/CMakeLists.txt | 2 +- tests/test-chat-auto-parser.cpp | 68 ++++++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c73bd38dfd..b8ce2fac90 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -190,7 +190,7 @@ endif() llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp) llama_build_and_test(test-jinja.cpp) llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python) -llama_build_and_test(test-chat-auto-parser.cpp) +llama_build_and_test(test-chat-auto-parser.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) llama_build_and_test(test-chat-template.cpp) llama_build_and_test(test-json-partial.cpp) llama_build_and_test(test-log.cpp) diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index 04122e9fae..90edaba32d 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -588,7 +588,9 @@ static void test_compare_variants_basic(testing & t) { auto result = differential_analyzer::compare_variants(tmpl, params, modifier); - t.assert_true("result should have value", result.has_value()); + if (!t.assert_true("result should have value", result.has_value())) { + return; + } // The template might not output anything if messages is empty or format is different // Check that we get a valid result t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty()); @@ -609,7 +611,9 @@ static void test_compare_variants_messages_modifier(testing & t) { std::optional result = differential_analyzer::compare_variants(tmpl, params, modifier); - t.assert_true("result should have value", result.has_value()); + if (!t.assert_true("result should have value", result.has_value())) { + return; + } t.assert_equal("left should be 'A'", "A", result->diff.left); t.assert_equal("right should be 'B'", "B", result->diff.right); } @@ -630,7 +634,9 @@ static void test_compare_variants_tools_modifier(testing & t) { auto result = differential_analyzer::compare_variants(tmpl, params, modifier); - t.assert_true("result should have value", result.has_value()); + if (!t.assert_true("result should have value", result.has_value())) { + return; + } t.assert_equal("left should be 'foo'", "foo", result->diff.left); t.assert_equal("right should be 'bar'", "bar", result->diff.right); } @@ -652,7 +658,9 @@ static void test_compare_variants_both_modifiers(testing & t) { auto result = differential_analyzer::compare_variants(tmpl, params, modifier); - t.assert_true("result should have value", result.has_value()); + if (!t.assert_true("result should have value", result.has_value())) { + return; + } t.assert_equal("left should be 'user:A'", "user:A", result->diff.left); t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right); } @@ -688,7 +696,9 @@ static void test_compare_variants_identity(testing & t) { // No modifier - should use identity auto result = differential_analyzer::compare_variants(tmpl, params, nullptr); - t.assert_true("result should have value", result.has_value()); + if (!t.assert_true("result should have value", result.has_value())) { + return; + } t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix); t.assert_equal("left should be empty", "", result->diff.left); t.assert_equal("right should be empty", "", result->diff.right); @@ -800,7 +810,9 @@ static void test_seed_oss_tool_presence(testing & t) { p.messages = params_with_tools.messages; }); - t.assert_true("T1 result should have value", result.has_value()); + if (!t.assert_true("T1 result should have value", result.has_value())) { + return; + } const auto & diff = result->diff; t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos); @@ -860,7 +872,9 @@ static void test_seed_oss_call_count(testing & t) { p.messages = json::array({user_msg, assistant_two_calls}); }); - t.assert_true("T2 result should have value", result.has_value()); + if (!t.assert_true("T2 result should have value", result.has_value())) { + return; + } const auto & diff = result->diff; @@ -950,7 +964,9 @@ static void test_seed_oss_function_names(testing & t) { p.messages = json::array({user_msg, assistant_func_beta}); }); - t.assert_true("T3 result should have value", result.has_value()); + if (!t.assert_true("T3 result should have value", result.has_value())) { + return; + } const auto & diff = result->diff; @@ -1052,7 +1068,9 @@ static void test_seed_oss_argument_count(testing & t) { p.messages = json::array({user_msg, assistant_one_arg}); }); - t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value()); + if (!t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value())) { + return; + } t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == ""); t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos); @@ -1068,7 +1086,9 @@ static void test_seed_oss_argument_count(testing & t) { p.messages = json::array({user_msg, assistant_two_args}); }); - t.assert_true("T4 one vs two result should have value", result_one_two.has_value()); + if (!t.assert_true("T4 one vs two result should have value", result_one_two.has_value())) { + return; + } const auto & diff4 = result_one_two->diff; t.assert_true("T4 one vs two left should contain arg1 (or prefix)", @@ -1124,7 +1144,9 @@ static void test_seed_oss_args_presence(testing & t) { p.messages = json::array({user_msg, assistant_other_arg}); }); - t.assert_true("T5 same vs other result should have value", result_same_other.has_value()); + if (!t.assert_true("T5 same vs other result should have value", result_same_other.has_value())) { + return; + } const auto & diff5a = result_same_other->diff; t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)", diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos); @@ -1141,7 +1163,9 @@ static void test_seed_oss_args_presence(testing & t) { p.messages = json::array({user_msg, assistant_both_args}); }); - t.assert_true("T5 same vs both result should have value", result_same_both.has_value()); + if (!t.assert_true("T5 same vs both result should have value", result_same_both.has_value())) { + return; + } const auto & diff5b = result_same_both->diff; t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)", diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos); @@ -1188,7 +1212,9 @@ static void test_seed_oss_tool_with_reasoning(testing & t) { p.messages = json::array({user_msg, assistant_tool_with_reasoning}); }); - t.assert_true("T6 result should have value", result.has_value()); + if (!t.assert_true("T6 result should have value", result.has_value())) { + return; + } const auto & diff = result->diff; @@ -1445,7 +1471,9 @@ static void test_standard_json_tools_openai(testing & t) { common_peg_parse_context ctx(input, false); auto result = parser.parse(ctx); - t.assert_true("parse success", result.success()); + if (!t.assert_true("parse success", result.success())) { + return; + } common_chat_msg msg; auto mapper = common_chat_peg_unified_mapper(msg); @@ -1489,7 +1517,9 @@ static void test_standard_json_tools_cohere(testing & t) { common_peg_parse_context ctx(input, false); auto result = parser.parse(ctx); - t.assert_true("parse success", result.success()); + if (!t.assert_true("parse success", result.success())) { + return; + } common_chat_msg msg; auto mapper = common_chat_peg_unified_mapper(msg); @@ -1533,7 +1563,9 @@ static void test_standard_json_tools_function_key(testing & t) { common_peg_parse_context ctx(input, false); auto result = parser.parse(ctx); - t.assert_true("parse success", result.success()); + if (!t.assert_true("parse success", result.success())) { + return; + } common_chat_msg msg; auto mapper = common_chat_peg_unified_mapper(msg); @@ -1806,7 +1838,9 @@ static void test_tagged_args_with_embedded_quotes(testing & t) { common_peg_parse_context ctx(input, false); auto result = parser.parse(ctx); - t.assert_true("parse success", result.success()); + if (!t.assert_true("parse success", result.success())) { + return; + } common_chat_msg msg; auto mapper = common_chat_peg_unified_mapper(msg); From c2f6fc3a178196571ff0ec7b03e6e45e79325863 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 3 Feb 2026 23:18:08 +0100 Subject: [PATCH 17/39] Remove [[noreturn]] as it causes compilation problems on Mac. --- common/jinja/value.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/common/jinja/value.h b/common/jinja/value.h index a86f0f0587..7111629cda 100644 --- a/common/jinja/value.h +++ b/common/jinja/value.h @@ -127,27 +127,27 @@ struct value_t { // Note: only for debugging and error reporting purposes virtual std::string type() const { return ""; } - [[noreturn]] virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); } - [[noreturn]] virtual double as_float() const { throw std::runtime_error(type() + " is not a float value"); } - [[noreturn]] virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); } - [[noreturn]] virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); } - [[noreturn]] virtual const std::vector & as_array() const { throw std::runtime_error(type() + " is not an array value"); } - [[noreturn]] virtual const std::vector> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); } - [[noreturn]] virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); } + virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); } + virtual double as_float() const { throw std::runtime_error(type() + " is not a float value"); } + virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); } + virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); } + virtual const std::vector & as_array() const { throw std::runtime_error(type() + " is not an array value"); } + virtual const std::vector> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); } + virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); } virtual bool is_none() const { return false; } virtual bool is_undefined() const { return false; } - [[noreturn]] virtual const func_builtins & get_builtins() const { + virtual const func_builtins & get_builtins() const { throw std::runtime_error("No builtins available for type " + type()); } - [[noreturn]] virtual bool has_key(const value &) { throw std::runtime_error(type() + " is not an object value"); } - [[noreturn]] virtual void insert(const value & /* key */, const value & /* val */) { throw std::runtime_error(type() + " is not an object value"); } - [[noreturn]] virtual value & at(const value & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); } - [[noreturn]] virtual value & at(const value & /* key */) { throw std::runtime_error(type() + " is not an object value"); } - [[noreturn]] virtual value & at(const std::string & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); } - [[noreturn]] virtual value & at(const std::string & /* key */) { throw std::runtime_error(type() + " is not an object value"); } - [[noreturn]] virtual value & at(int64_t /* idx */, value & /* default_val */) { throw std::runtime_error(type() + " is not an array value"); } - [[noreturn]] virtual value & at(int64_t /* idx */) { throw std::runtime_error(type() + " is not an array value"); } + virtual bool has_key(const value &) { throw std::runtime_error(type() + " is not an object value"); } + virtual void insert(const value & /* key */, const value & /* val */) { throw std::runtime_error(type() + " is not an object value"); } + virtual value & at(const value & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); } + virtual value & at(const value & /* key */) { throw std::runtime_error(type() + " is not an object value"); } + virtual value & at(const std::string & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); } + virtual value & at(const std::string & /* key */) { throw std::runtime_error(type() + " is not an object value"); } + virtual value & at(int64_t /* idx */, value & /* default_val */) { throw std::runtime_error(type() + " is not an array value"); } + virtual value & at(int64_t /* idx */) { throw std::runtime_error(type() + " is not an array value"); } virtual bool is_numeric() const { return false; } virtual bool is_hashable() const { return false; } From 60717b3e5a697ff57144cb1af261b3370697e82d Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Wed, 4 Feb 2026 22:14:36 +0100 Subject: [PATCH 18/39] Fix pesky issue on optional trailing arguments in function calls for TAGGED format --- common/chat-auto-parser-generator.cpp | 6 +++ tests/test-chat.cpp | 72 +++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index ba0cf66b08..87d431add3 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -309,6 +309,12 @@ common_peg_parser universal_peg_generator::build_tool_parser( if (!m.func_close.empty()) { func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close)); + } else if (!m.per_call_end.empty()) { + // When there's no func_close but there is a per_call_end marker, use peek() to ensure + // we only emit tool_close when we can actually see the closing marker. This prevents + // premature closing during partial parsing when we've seen e.g. "" (end) or "" prefix that failed to match. + func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end))); } else { func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper } diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 18d8052a5d..e9a18f7f4a 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -338,6 +338,24 @@ static common_chat_tool magic_tool{ })", }; +static common_chat_tool magic_int_tool{ + /* .name = */ "magic_int", + /* .description = */ "Magic tool that takes a hash", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "ref": { + "type": "integer" + }, + "name": { + "type": "string" + } + }, + "required": ["ref"] + })", +}; + + static std::vector tools{ special_function_tool, special_function_tool_with_optional_param, python_tool, html_tool, todo_list }; @@ -2115,6 +2133,60 @@ static void test_template_output_peg_parsers(bool detailed_debug) { { "magic", R"({"name": "fooBar", "ref": "5123123"})", {} }, }) .run(); + + // Test that numeric values are correctly interpreted as numbers when schema calls for number + tst.test( + "Let me call the special function\n" + "\n" + "\n" + "\n" + "\n42555916\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect_reasoning("Let me call the special function") + .expect_tool_calls({ + { "special_function", R"({"arg1": 42555916})", {} }, + }) + .run(); + + tst.test( + "Let me call the special function with opt\n" + "\n" + "\n" + "\n" + "\n42555916\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool_with_optional_param }) + .expect_reasoning("Let me call the special function with opt") + .expect_tool_calls({ + { "special_function_with_opt", R"({"arg1": 42555916})", {} }, + }) + .run(); + + tst.test( + "Let me call the magic_int function\n" + "\n" + "\n" + "\n" + "\n42555916\n\n" + "\nbaz\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ magic_int_tool }) + .expect_reasoning("Let me call the magic_int function") + .expect_tool_calls({ + { "magic_int", R"({"ref": 42555916, "name": "baz"})", {} }, + }) + .run(); + } } From b260de1d868f3ec06eb7b385b7683dd4065ff1b1 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Thu, 5 Feb 2026 12:27:09 +0100 Subject: [PATCH 19/39] More edge cases --- common/chat-peg-parser.cpp | 58 ++++++++++++++----------- tests/test-chat.cpp | 87 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 24 deletions(-) diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index f84b6ba298..2922c8d582 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -272,13 +272,42 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1)); std::string value_to_add; - if (!value_content.empty()) { + if (value_content.empty() && is_arg_string_value) { + // Empty string value - start with opening quote + // arg_close will add the closing quote + if (!current_tool->name.empty()) { + value_to_add = "\""; + needs_closing_quote = true; + } else { + value_to_add = "\""; + buffer_needs_closing_quote = true; + } + } else if (!value_content.empty() && is_arg_string_value) { + // Schema declares this as string type - always treat as literal string value + // Never try to parse as JSON (this ensures consistent handling of quoted strings + // like "foo" which would otherwise be parsed as JSON string 'foo') + if (!current_tool->name.empty()) { + if (!needs_closing_quote) { + value_to_add = "\""; + needs_closing_quote = true; + } + } else { + if (!buffer_needs_closing_quote) { + value_to_add = "\""; + buffer_needs_closing_quote = true; + } + } + // Escape special characters in the string content + std::string escaped = json(value_content).dump(); + // Remove the surrounding quotes from the escaped string + if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { + escaped = escaped.substr(1, escaped.size() - 2); + } + value_to_add += escaped; + } else if (!value_content.empty()) { // For potential containers, normalize Python-style single quotes to JSON double quotes first // This ensures consistent output during both partial and final parsing - // Note: is_arg_string_value means the schema explicitly declares this as a string type, - // so we should NOT treat it as a potential container even if it starts with [ or { - bool is_potential_container = !is_arg_string_value && - (value_content[0] == '[' || value_content[0] == '{'); + bool is_potential_container = value_content[0] == '[' || value_content[0] == '{'; if (is_potential_container) { value_content = normalize_quotes_to_json(value_content); } @@ -301,25 +330,6 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { } else { buffer_needs_closing_quote = true; } - } else if (is_arg_string_value) { - // Schema declares this as string type but it parsed as non-string (e.g., number) - // Force treatment as string value - add opening quote and escape content - if (!current_tool->name.empty()) { - if (!needs_closing_quote) { - value_to_add = "\""; - needs_closing_quote = true; - } - } else { - if (!buffer_needs_closing_quote) { - value_to_add = "\""; - buffer_needs_closing_quote = true; - } - } - std::string escaped = json(value_content).dump(); - if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { - escaped = escaped.substr(1, escaped.size() - 2); - } - value_to_add += escaped; } else { // For non-string values (number, bool, null, object, array), add raw value content // Using raw content instead of dump() ensures monotonicity for streaming diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index e9a18f7f4a..f55ab398bd 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -355,6 +355,40 @@ static common_chat_tool magic_int_tool{ })", }; +static common_chat_tool string_param_tool{ + /* .name = */ "string_param", + /* .description = */ "Tool with string parameter for testing", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "A text parameter" + } + }, + "required": [] + })", +}; + +static common_chat_tool quoted_unquoted_tool{ + /* .name = */ "quoted_unquoted", + /* .description = */ "Tool with two string parameters, one for quoted string, one for unquoted", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "quoted": { + "type": "string", + "description": "Quoted value" + }, + "unquoted": { + "type": "string", + "description": "Unquoted value" + } + }, + "required": ["quoted", "unquoted"] + })", +}; + static std::vector tools{ special_function_tool, special_function_tool_with_optional_param, python_tool, html_tool, todo_list }; @@ -2187,6 +2221,59 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); + tst.test( + "Call string_param with empty text\n" + "\n" + "\n" + "\n" + "\n\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ string_param_tool }) + .expect_reasoning("Call string_param with empty text") + .expect_tool_calls({ + { "string_param", R"({"text": ""})", {} }, + }) + .run(); + + tst.test( + "Test simple quoted unquoted\n" + "\n" + "\n" + "\n" + "\n\"foo\"\n\n" + "\nfoo\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ quoted_unquoted_tool }) + .expect_reasoning("Test simple quoted unquoted") + .expect_tool_calls({ + { "quoted_unquoted", R"({"quoted": "\"foo\"", "unquoted": "foo"})", {} }, + }) + .run(); + + tst.test( + "Test complex quoted unquoted\n" + "\n" + "\n" + "\n" + "\n\"printf(\\\"foo\\\");\"\n\n" + "\nprintf(\"foo\");\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ quoted_unquoted_tool }) + .expect_reasoning("Test complex quoted unquoted") + .expect_tool_calls({ + { "quoted_unquoted", R"({ "quoted" : "\"printf(\\\"foo\\\");\"", "unquoted": "printf(\"foo\");" })", {} } + }) + .run(); + } } From 2081e9b056bbf1dd47c96799fe7bf4215f04f1f5 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 6 Feb 2026 14:35:05 +0100 Subject: [PATCH 20/39] Fix number partial parsing issue --- common/chat-peg-parser.h | 2 +- common/peg-parser.cpp | 6 ++++- tests/test-chat.cpp | 49 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h index 7304ca7e61..f5d49a403a 100644 --- a/common/chat-peg-parser.h +++ b/common/chat-peg-parser.h @@ -77,7 +77,7 @@ class common_chat_peg_unified_builder : public common_chat_peg_builder { // Use for schema-declared string types - won't be treated as potential JSON container common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); } - common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); } + common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_VALUE, p)); } // Legacy-compatible helper for building standard JSON tool calls // Used by tests and manual parsers diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp index 7a4c1cc398..f1b10b21a5 100644 --- a/common/peg-parser.cpp +++ b/common/peg-parser.cpp @@ -1307,7 +1307,11 @@ common_peg_parser common_peg_parser_builder::json_number() { auto int_part = choice({ literal("0"), sequence({ digit1_9, chars("[0-9]", 0, -1) }) }); auto frac = sequence({ literal("."), digits }); auto exp = sequence({ choice({ literal("e"), literal("E") }), optional(chars("[+-]", 1, 1)), digits }); - return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), space() }); + // Negative lookahead: only commit the number when the next character can't extend it. + // At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed. + // This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming). + auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1)); + return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() }); }); } diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index f55ab398bd..e64e362129 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -355,6 +355,21 @@ static common_chat_tool magic_int_tool{ })", }; +static common_chat_tool amount_tool{ + /* .name = */ "amount", + /* .description = */ "Amount converter", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "orig": { + "type": "number" + } + }, + "required": ["orig"] + })", +}; + + static common_chat_tool string_param_tool{ /* .name = */ "string_param", /* .description = */ "Tool with string parameter for testing", @@ -2274,6 +2289,40 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); + tst.test( + "Test negative number\n" + "\n" + "\n" + "\n" + "\n-14\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ magic_int_tool }) + .expect_reasoning("Test negative number") + .expect_tool_calls({ + { "magic_int", R"({ "ref" : -14 })", {} } + }) + .run(); + + tst.test( + "Test decimal number\n" + "\n" + "\n" + "\n" + "\n3.14\n\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ amount_tool }) + .expect_reasoning("Test decimal number") + .expect_tool_calls({ + { "amount", R"({ "orig" : 3.14 })", {} } + }) + .run(); + } } From bd549b3b37c0d797f28913331f5706d214ea16cc Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 7 Feb 2026 23:24:29 +0100 Subject: [PATCH 21/39] Fix case with object inside object, refactor long methods. --- common/chat-auto-parser-generator.cpp | 438 ++++++++-------- common/chat-auto-parser.h | 16 + common/chat-diff-analyzer.h | 12 - common/chat-peg-parser.cpp | 727 ++++++++++++-------------- common/chat-peg-parser.h | 32 +- tests/test-chat.cpp | 43 +- 6 files changed, 656 insertions(+), 612 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 87d431add3..e9fe71c1d6 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -133,234 +133,252 @@ common_peg_parser universal_peg_generator::build_tool_parser( const templates_params & inputs, const common_peg_parser & reasoning) { + switch (analysis.tools) { + case tool_format::JSON_NATIVE: + return build_tool_parser_json_native(p, analysis, inputs, reasoning); + case tool_format::TAG_WITH_JSON: + return build_tool_parser_tag_json(p, analysis, inputs, reasoning); + case tool_format::TAG_WITH_TAGGED: + return build_tool_parser_tag_tagged(p, analysis, inputs, reasoning); + default: + GGML_ABORT("Unable to create tool parser"); + } +} + +common_peg_parser universal_peg_generator::build_tool_parser_json_native( + common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning) { + const auto & m = analysis.markers; - // Build tool choice parser based on format + // Build effective field names with dot notation if function_field is set + std::string name_field = analysis.name_field; + std::string args_field = analysis.args_field; + + if (!analysis.function_field.empty() && + analysis.function_field != "function" && + name_field.find('.') == std::string::npos) { + name_field = analysis.function_field + "." + name_field; + args_field = analysis.function_field + "." + args_field; + } + + auto tools_parser = p.standard_json_tools( + m.tool_section_start, + m.tool_section_end, + inputs.tools, + inputs.parallel_tool_calls, + inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, + name_field, + args_field, + analysis.tools_array_wrapped, + analysis.fun_name_is_key, + analysis.id_field, + analysis.gen_id_field, + analysis.parameter_order + ); + + // Handle content wrappers if present + if (analysis.content == content_mode::ALWAYS_WRAPPED && + !m.content_start.empty() && !m.content_end.empty()) { + auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end); + return reasoning + wrapped_content + tools_parser + p.end(); + } + + auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start); + return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); +} + +common_peg_parser universal_peg_generator::build_tool_parser_tag_json( + common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning) { + + const auto & m = analysis.markers; common_peg_parser tool_choice = p.choice(); - if (analysis.tools == tool_format::JSON_NATIVE) { - // Pure JSON format: use standard_json_tools helper - // Build effective field names with dot notation if function_field is set - std::string name_field = analysis.name_field; - std::string args_field = analysis.args_field; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & schema = function.at("parameters"); - if (!analysis.function_field.empty() && - analysis.function_field != "function" && - name_field.find('.') == std::string::npos) { - name_field = analysis.function_field + "." + name_field; - args_field = analysis.function_field + "." + args_field; + // Build call_id parser based on position (if supported) + common_peg_parser call_id_section = p.eps(); + if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { + call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; } - auto tools_parser = p.standard_json_tools( - m.tool_section_start, - m.tool_section_end, - inputs.tools, - inputs.parallel_tool_calls, - inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, - name_field, - args_field, - analysis.tools_array_wrapped, - analysis.fun_name_is_key, - analysis.id_field, - analysis.gen_id_field, - analysis.parameter_order - ); + auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + + call_id_section + + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)); - // Handle content wrappers if present - if (analysis.content == content_mode::ALWAYS_WRAPPED && - !m.content_start.empty() && !m.content_end.empty()) { - auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end); - return reasoning + wrapped_content + tools_parser + p.end(); + if (!m.func_close.empty()) { + func_parser = func_parser + m.func_close; } - auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start); - return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); - } + tool_choice |= p.rule("tool-" + name, func_parser); + }); - if (analysis.tools == tool_format::TAG_WITH_JSON) { - // Tag-based with JSON args: {args} - // With optional call_id: [CALL_ID]id[ARGS]{args} - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; - // Build call_id parser based on position (if supported) - common_peg_parser call_id_section = p.eps(); - if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && - !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { - // Optional call_id followed by required call_id_suffix (which is also args_start) - // Format: optional([CALL_ID] + call_id_value) + [ARGS] - call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; - } + common_peg_parser tool_calls = p.eps(); - auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + - call_id_section + - p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)); - - if (!m.func_close.empty()) { - func_parser = func_parser + m.func_close; - } - - tool_choice |= p.rule("tool-" + name, func_parser); - }); - - auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - common_peg_parser tool_calls = p.eps(); - - if (!m.per_call_start.empty()) { - // Per-call wrapping: each call individually wrapped - auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end; - if (inputs.parallel_tool_calls) { - tool_calls = p.trigger_rule("tool-call", - wrapped_call + p.zero_or_more(p.space() + wrapped_call)); - } else { - tool_calls = p.trigger_rule("tool-call", wrapped_call); - } - if (!m.tool_section_start.empty()) { - tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + - tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); - } + if (!m.per_call_start.empty()) { + auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end; + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + wrapped_call + p.zero_or_more(p.space() + wrapped_call)); } else { - std::string separator = m.call_separator; - if (separator.empty()) { - separator = ", "; // Default - } - - if (inputs.parallel_tool_calls) { - tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end); - } else { - tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + tool_choice + m.tool_section_end); - } + tool_calls = p.trigger_rule("tool-call", wrapped_call); + } + if (!m.tool_section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + + tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); + } + } else { + std::string separator = m.call_separator; + if (separator.empty()) { + separator = ", "; // Default } - if (!require_calls) { - tool_calls = p.optional(tool_calls); - } - - std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; - auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); - } - - if (analysis.tools == tool_format::TAG_WITH_TAGGED) { - // Tag-based with tagged args: value - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & params = function.at("parameters"); - - if (!params.contains("properties") || !params.at("properties").is_object()) { - return; - } - - const auto & properties = params.at("properties"); - std::set required; - if (params.contains("required") && params.at("required").is_array()) { - params.at("required").get_to(required); - } - - // Build parser for each argument - std::vector arg_parsers; - for (const auto & [param_name, param_schema] : properties.items()) { - bool is_required = required.find(param_name) != required.end(); - auto type = param_schema.value("type", "object"); - - auto arg = p.tool_arg( - p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix + - (type == "string" ? - p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix), - "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : - p.tool_arg_json_value(p.schema(p.json(), - "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) + - p.tool_arg_close(p.literal(m.arg_value_suffix)) - ); - - if (is_required) { - arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg)); - } else { - arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); - } - } - - // Build arg sequence with space() between consecutive args - common_peg_parser args_seq = p.eps(); - for (size_t i = 0; i < arg_parsers.size(); i++) { - if (i > 0) { - args_seq = args_seq + p.space(); - } - args_seq = args_seq + arg_parsers[i]; - } - - // Build call_id parser based on position (if supported) - common_peg_parser call_id_section = p.eps(); - if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && - !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { - // Optional call_id followed by required call_id_suffix - call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; - } - - auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + - call_id_section + - p.space() + args_seq; - - if (!m.func_close.empty()) { - func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close)); - } else if (!m.per_call_end.empty()) { - // When there's no func_close but there is a per_call_end marker, use peek() to ensure - // we only emit tool_close when we can actually see the closing marker. This prevents - // premature closing during partial parsing when we've seen e.g. "" (end) or "" prefix that failed to match. - func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end))); - } else { - func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper - } - - tool_choice |= p.rule("tool-" + name, func_parser); - }); - - auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - common_peg_parser tool_calls = p.eps(); - - if (!m.per_call_start.empty()) { - // Per-call wrapping: each call individually wrapped (e.g., ...) - auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end; - if (inputs.parallel_tool_calls) { - tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); - } else { - tool_calls = p.trigger_rule("tool-call", wrapped_call); - } - if (!m.tool_section_start.empty()) { - tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + - tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); - } + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end); } else { - std::string separator = m.call_separator; - if (separator.empty()) { - separator = ", "; // Default - } - - if (inputs.parallel_tool_calls) { - tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end); - } else { - tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end); - } + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + tool_choice + m.tool_section_end); } - - if (!require_tools) { - tool_calls = p.optional(tool_calls); - } - - std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; - auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); } - GGML_ABORT("Unable to create tool parser"); + if (!require_calls) { + tool_calls = p.optional(tool_calls); + } + + std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; + auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); + return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); +} + +common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( + common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning) { + + const auto & m = analysis.markers; + common_peg_parser tool_choice = p.choice(); + + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); + + if (!params.contains("properties") || !params.at("properties").is_object()) { + return; + } + + const auto & properties = params.at("properties"); + std::set required; + if (params.contains("required") && params.at("required").is_array()) { + params.at("required").get_to(required); + } + + // Build parser for each argument + std::vector arg_parsers; + for (const auto & [param_name, param_schema] : properties.items()) { + bool is_required = required.find(param_name) != required.end(); + auto type = param_schema.value("type", "object"); + + auto arg = p.tool_arg( + p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix + + (type == "string" ? + p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : + p.tool_arg_json_value(p.schema(p.json(), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) + + p.tool_arg_close(p.literal(m.arg_value_suffix)) + ); + + if (is_required) { + arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg)); + } else { + arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); + } + } + + // Build arg sequence with space() between consecutive args + common_peg_parser args_seq = p.eps(); + for (size_t i = 0; i < arg_parsers.size(); i++) { + if (i > 0) { + args_seq = args_seq + p.space(); + } + args_seq = args_seq + arg_parsers[i]; + } + + // Build call_id parser based on position (if supported) + common_peg_parser call_id_section = p.eps(); + if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { + call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; + } + + auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + + call_id_section + + p.space() + args_seq; + + if (!m.func_close.empty()) { + func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close)); + } else if (!m.per_call_end.empty()) { + // When there's no func_close but there is a per_call_end marker, use peek() to ensure + // we only emit tool_close when we can actually see the closing marker. This prevents + // premature closing during partial parsing when we've seen e.g. "" (end) or "" prefix that failed to match. + func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end))); + } else { + func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper + } + + tool_choice |= p.rule("tool-" + name, func_parser); + }); + + auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + + common_peg_parser tool_calls = p.eps(); + + if (!m.per_call_start.empty()) { + auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end; + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); + } else { + tool_calls = p.trigger_rule("tool-call", wrapped_call); + } + if (!m.tool_section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + + tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); + } + } else { + std::string separator = m.call_separator; + if (separator.empty()) { + separator = ", "; // Default + } + + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end); + } else { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end); + } + } + + if (!require_tools) { + tool_calls = p.optional(tool_calls); + } + + std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; + auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); + return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); } diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h index c6587667d1..40f1fbe1bb 100644 --- a/common/chat-auto-parser.h +++ b/common/chat-auto-parser.h @@ -51,4 +51,20 @@ class universal_peg_generator { const diff_analysis_result & analysis, const templates_params & inputs, const common_peg_parser & reasoning); + + // Per-format tool parser builders + static common_peg_parser build_tool_parser_json_native(common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning); + + static common_peg_parser build_tool_parser_tag_json(common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning); + + static common_peg_parser build_tool_parser_tag_tagged(common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning); }; diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h index 7933de5ce3..ce729df0e6 100644 --- a/common/chat-diff-analyzer.h +++ b/common/chat-diff-analyzer.h @@ -169,11 +169,7 @@ enum class tool_format { NONE, // No tool support detected JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}} TAG_WITH_JSON, // Tag-based with JSON args: {...} - BRACKET_TAG, // Bracket-tag: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} - PREFIXED_INDEXED, // Prefixed-indexed: functions.X:0{...} - RECIPIENT_BASED, // Recipient routing: >>>func_name\n{...} TAG_WITH_TAGGED, // Tag-based with tagged args: value - MARKDOWN_BLOCK, // Markdown code block: Action:\n```json\n[...]\n``` }; inline std::ostream & operator<<(std::ostream & os, const tool_format & format) { @@ -184,16 +180,8 @@ inline std::ostream & operator<<(std::ostream & os, const tool_format & format) return os << "JSON_NATIVE"; case tool_format::TAG_WITH_JSON: return os << "TAG_WITH_JSON"; - case tool_format::BRACKET_TAG: - return os << "BRACKET_TAG"; - case tool_format::PREFIXED_INDEXED: - return os << "PREFIXED_INDEXED"; - case tool_format::RECIPIENT_BASED: - return os << "RECIPIENT_BASED"; case tool_format::TAG_WITH_TAGGED: return os << "TAG_WITH_TAGGED"; - case tool_format::MARKDOWN_BLOCK: - return os << "MARKDOWN_BLOCK"; default: return os << "UNKNOWN"; } diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index 2922c8d582..cb38fb160f 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -35,6 +35,45 @@ static std::string_view trim(std::string_view sv) { return trim_trailing_space(trim_leading_space(sv, 1)); } +// Count the number of unclosed '{' braces in a JSON-like string, +// properly skipping braces inside quoted strings. +static int json_brace_depth(const std::string & s) { + int depth = 0; + bool in_string = false; + bool escaped = false; + for (char c : s) { + if (escaped) { + escaped = false; + continue; + } + if (c == '\\' && in_string) { + escaped = true; + continue; + } + if (c == '"') { + in_string = !in_string; + continue; + } + if (!in_string) { + if (c == '{') { + depth++; + } else if (c == '}') { + depth--; + } + } + } + return depth; +} + +// JSON-escape a string and return the inner content (without surrounding quotes). +static std::string escape_json_string_inner(const std::string & s) { + std::string escaped = json(s).dump(); + if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { + return escaped.substr(1, escaped.size() - 2); + } + return escaped; +} + // Convert Python-style single-quoted strings to JSON double-quoted strings // Only converts outer string delimiters, properly handling escape sequences: // - {'key': 'value'} -> {"key": "value"} @@ -148,6 +187,10 @@ common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::stri return zero_or_more(choice({ p, content_chunk })); } +std::string & common_chat_peg_unified_mapper::args_target() { + return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer; +} + void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & parse_result_arg) { // Call base class to visit all nodes @@ -156,15 +199,12 @@ void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & ar // Flush any pending tool call that was started but never got a name // This happens during partial parsing when the tool call is incomplete if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) { - // Transfer any buffered arguments if (!args_buffer.empty()) { pending_tool_call->arguments = args_buffer; } - // Close any open quotes in buffered args - if (buffer_needs_closing_quote && !pending_tool_call->arguments.empty()) { + if (closing_quote_pending && !pending_tool_call->arguments.empty()) { pending_tool_call->arguments += "\""; } - // Add the incomplete tool call to results result.tool_calls.push_back(pending_tool_call.value()); pending_tool_call.reset(); } @@ -187,15 +227,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE; if (is_tool_open) { - // Don't create tool call yet - wait for name to be known - // This prevents sending incomplete tool calls in streaming mode - pending_tool_call = common_chat_tool_call(); - current_tool = &pending_tool_call.value(); - arg_count = 0; - // Clear the arguments buffer for the new tool + pending_tool_call = common_chat_tool_call(); + current_tool = &pending_tool_call.value(); + arg_count = 0; args_buffer.clear(); - needs_closing_quote = false; - buffer_needs_closing_quote = false; + closing_quote_pending = false; } if (is_tool_id && current_tool) { @@ -208,15 +244,14 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { if (is_tool_name && current_tool) { current_tool->name = std::string(trim_trailing_space(node.text)); - // Now that we have the name, we can populate the arguments from the buffer + // Now that we have the name, populate the arguments from the buffer if (!args_buffer.empty()) { current_tool->arguments = args_buffer; args_buffer.clear(); } else if (current_tool->arguments.empty()) { - // Initialize arguments if we're using tagged format and no buffered args current_tool->arguments = "{"; } - // Now that we have the name, add the tool call to the result + // Add the tool call to results so streaming can see it if (pending_tool_call.has_value()) { result.tool_calls.push_back(pending_tool_call.value()); pending_tool_call.reset(); @@ -225,28 +260,16 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { } if (is_tool_args && current_tool) { - // For JSON format, the arguments come as a complete JSON object - // For tagged format, we build up arguments from individual arg_name/arg_value nodes - // Check if this looks like JSON (starts with {) vs tagged format (starts with <) + // For JSON format: arguments come as a complete JSON object + // For tagged format: built up from individual arg_name/arg_value nodes auto text = trim_trailing_space(node.text); if (!text.empty() && text.front() == '{') { - // If we have the tool name, populate directly; otherwise buffer - if (!current_tool->name.empty()) { - current_tool->arguments = std::string(text); - } else { - args_buffer = std::string(text); - } + args_target() = std::string(text); } - // If it's tagged format, we ignore this and let arg_name/arg_value build up the JSON } if (is_arg_open) { - // Reset for new argument - if (!current_tool->name.empty()) { - needs_closing_quote = false; - } else { - buffer_needs_closing_quote = false; - } + closing_quote_pending = false; } if (is_arg_name && current_tool) { @@ -257,15 +280,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { arg_entry += json(trim(node.text)).dump() + ":"; ++arg_count; - // If we have the tool name, add directly; otherwise buffer - if (!current_tool->name.empty()) { - current_tool->arguments += arg_entry; - } else { - if (args_buffer.empty()) { - args_buffer = "{"; - } - args_buffer += arg_entry; + auto & target = args_target(); + if (target.empty()) { + target = "{"; } + target += arg_entry; } if ((is_arg_value || is_arg_string_value) && current_tool) { @@ -273,160 +292,83 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { std::string value_to_add; if (value_content.empty() && is_arg_string_value) { - // Empty string value - start with opening quote - // arg_close will add the closing quote - if (!current_tool->name.empty()) { - value_to_add = "\""; - needs_closing_quote = true; - } else { - value_to_add = "\""; - buffer_needs_closing_quote = true; - } + // Empty string value - arg_close will add the closing quote + value_to_add = "\""; + closing_quote_pending = true; } else if (!value_content.empty() && is_arg_string_value) { // Schema declares this as string type - always treat as literal string value - // Never try to parse as JSON (this ensures consistent handling of quoted strings - // like "foo" which would otherwise be parsed as JSON string 'foo') - if (!current_tool->name.empty()) { - if (!needs_closing_quote) { - value_to_add = "\""; - needs_closing_quote = true; - } - } else { - if (!buffer_needs_closing_quote) { - value_to_add = "\""; - buffer_needs_closing_quote = true; - } + if (!closing_quote_pending) { + value_to_add = "\""; + closing_quote_pending = true; } - // Escape special characters in the string content - std::string escaped = json(value_content).dump(); - // Remove the surrounding quotes from the escaped string - if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { - escaped = escaped.substr(1, escaped.size() - 2); - } - value_to_add += escaped; + value_to_add += escape_json_string_inner(value_content); } else if (!value_content.empty()) { - // For potential containers, normalize Python-style single quotes to JSON double quotes first - // This ensures consistent output during both partial and final parsing + // For potential containers, normalize Python-style single quotes to JSON double quotes bool is_potential_container = value_content[0] == '[' || value_content[0] == '{'; if (is_potential_container) { value_content = normalize_quotes_to_json(value_content); } // Try to parse as JSON value (number, bool, null, object, array) - // For strings, we need special handling to support incremental parsing try { json parsed = json::parse(value_content); if (parsed.is_string()) { - // For string values, don't add closing quote yet (added by arg_close) - // This ensures incremental parsing produces monotonic arguments + // Don't add closing quote yet (added by arg_close) for monotonic streaming std::string escaped = parsed.dump(); - // Remove the trailing quote if (!escaped.empty() && escaped.back() == '"') { escaped.pop_back(); } - value_to_add = escaped; - if (!current_tool->name.empty()) { - needs_closing_quote = true; - } else { - buffer_needs_closing_quote = true; - } + value_to_add = escaped; + closing_quote_pending = true; } else { - // For non-string values (number, bool, null, object, array), add raw value content - // Using raw content instead of dump() ensures monotonicity for streaming - // (prevents issues with spaces being removed by dump()) + // Non-string values: use raw content to preserve whitespace for monotonicity value_to_add = value_content; } } catch (...) { - // JSON parsing failed - content is either incomplete (partial) or not valid JSON - // Note: potential containers were already normalized above, so value_content - // already has double quotes if it started with [ or { - if (node.is_partial && is_potential_container) { - // During incremental parsing, if it looks like a JSON container, don't wrap in quotes yet - // and don't escape. Just pass through the (already normalized) content. + // Partial container: pass through the already-normalized content value_to_add = value_content; } else { - // Not valid JSON and NOT a potential partial container - treat as string value - // Add opening quote if not already in a string - if (!current_tool->name.empty()) { - if (!needs_closing_quote) { - value_to_add = "\""; - needs_closing_quote = true; - } - } else { - if (!buffer_needs_closing_quote) { - value_to_add = "\""; - buffer_needs_closing_quote = true; - } + // Not valid JSON - treat as string value + if (!closing_quote_pending) { + value_to_add = "\""; + closing_quote_pending = true; } - // Escape special characters in the string content - std::string escaped = json(value_content).dump(); - // Remove the surrounding quotes from the escaped string - if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { - escaped = escaped.substr(1, escaped.size() - 2); - } - value_to_add += escaped; + value_to_add += escape_json_string_inner(value_content); } } } - // If we have the tool name, add directly; otherwise buffer - if (!current_tool->name.empty()) { - current_tool->arguments += value_to_add; - } else { - if (args_buffer.empty()) { - args_buffer = "{"; - } - args_buffer += value_to_add; - } + args_target() += value_to_add; } if (is_arg_close && current_tool) { - if (!current_tool->name.empty()) { - if (needs_closing_quote) { - current_tool->arguments += "\""; - needs_closing_quote = false; - } - } else { - if (buffer_needs_closing_quote) { - if (args_buffer.empty()) { - args_buffer = "{"; - } - args_buffer += "\""; - buffer_needs_closing_quote = false; - } + if (closing_quote_pending) { + args_target() += "\""; + closing_quote_pending = false; } } if (is_tool_close && current_tool) { - if (!current_tool->name.empty()) { - if (needs_closing_quote) { - current_tool->arguments += "\""; - needs_closing_quote = false; - } - if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') { - current_tool->arguments += "}"; - } - // If we have a pending tool call that wasn't added yet, add it now - if (pending_tool_call.has_value()) { + // Flush buffer to arguments if tool name was never seen + if (current_tool->name.empty() && !args_buffer.empty()) { + current_tool->arguments = args_buffer; + args_buffer.clear(); + } + // Close any pending string quote + if (closing_quote_pending) { + current_tool->arguments += "\""; + closing_quote_pending = false; + } + // Close any unclosed braces (accounts for nested objects) + for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) { + current_tool->arguments += "}"; + } + // Add tool call to results if named; otherwise discard + if (pending_tool_call.has_value()) { + if (!current_tool->name.empty()) { result.tool_calls.push_back(pending_tool_call.value()); - pending_tool_call.reset(); } - } else { - // We're closing a tool without a name - flush the buffer - if (!args_buffer.empty()) { - current_tool->arguments = args_buffer; - args_buffer.clear(); - } - if (buffer_needs_closing_quote) { - current_tool->arguments += "\""; - buffer_needs_closing_quote = false; - } - // Close the arguments object if using tagged format - if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') { - current_tool->arguments += "}"; - } - // Don't add to result if no name - this prevents incomplete tool calls pending_tool_call.reset(); } } @@ -511,6 +453,241 @@ static std::pair parse_key_spec(const std::string & ke return {key.substr(0, dot_pos), key.substr(dot_pos + 1)}; } +// Mode 1: function_is_key — parse {"function_name": {...}} +common_peg_parser common_chat_peg_unified_builder::build_json_tools_function_is_key( + const nlohmann::json & tools, + const std::string & args_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key) { + + auto tool_choices = choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build inner object fields + std::vector inner_fields; + + if (!call_id_key.empty()) { + auto id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + literal("\"") + tool_id(json_string_content()) + literal("\"") + ); + inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space()))); + } + + if (!gen_call_id_key.empty()) { + auto gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space()))); + } + + // Arguments — either wrapped in args_key or parsed directly + common_peg_parser args_parser = eps(); + if (args_key.empty()) { + args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params)); + } else { + args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + } + inner_fields.push_back(args_parser); + + // Build inner object parser + common_peg_parser inner_object = eps(); + if (args_key.empty() && inner_fields.size() == 1) { + inner_object = inner_fields[0]; + } else { + inner_object = literal("{") + space(); + for (size_t i = 0; i < inner_fields.size(); i++) { + inner_object = inner_object + inner_fields[i]; + if (i < inner_fields.size() - 1) { + inner_object = inner_object + space(); + } + } + inner_object = inner_object + space() + literal("}"); + } + + auto tool_parser = tool( + tool_open(literal("{")) + space() + + literal("\"") + tool_name(literal(name)) + literal("\"") + + space() + literal(":") + space() + + inner_object + + space() + tool_close(literal("}")) + ); + + tool_choices |= rule("tool-" + name, tool_parser); + } + + return tool_choices; +} + +// Mode 2: Nested keys (dot notation like "function.name") +common_peg_parser common_chat_peg_unified_builder::build_json_tools_nested_keys( + const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key) { + + auto tool_choices = choice(); + + auto name_spec = parse_key_spec(effective_name_key); + auto args_spec = parse_key_spec(effective_args_key); + + std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first; + std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key; + std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key; + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() + + literal("\"") + tool_name(literal(name)) + literal("\""); + auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + + auto nested_object = literal("{") + space() + + nested_name + space() + literal(",") + space() + + nested_args + + space() + literal("}"); + + // Format: { id?, "function": {...} } + auto tool_parser_body = tool_open(literal("{")) + space(); + + if (!call_id_key.empty()) { + auto id_spec = parse_key_spec(call_id_key); + if (id_spec.first.empty()) { + auto id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + literal("\"") + tool_id(json_string_content()) + literal("\"") + ); + tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space()); + } + } + + if (!gen_call_id_key.empty()) { + auto gen_id_spec = parse_key_spec(gen_call_id_key); + if (gen_id_spec.first.empty()) { + auto gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space()); + } + } + + auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object; + tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}")); + + tool_choices |= rule("tool-" + name, tool(tool_parser_body)); + } + + return tool_choices; +} + +// Mode 3: Flat keys with optional ID fields and parameter ordering +common_peg_parser common_chat_peg_unified_builder::build_json_tools_flat_keys( + const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key, + const std::vector & parameters_order) { + + auto tool_choices = choice(); + auto name_key_parser = literal("\"" + effective_name_key + "\""); + auto args_key_parser = literal("\"" + effective_args_key + "\""); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + auto tool_name_ = name_key_parser + space() + literal(":") + space() + + literal("\"") + tool_name(literal(name)) + literal("\""); + auto tool_args_ = args_key_parser + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + + // Build ID parsers if keys are provided + common_peg_parser id_parser = eps(); + if (!call_id_key.empty()) { + id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + } + + common_peg_parser gen_id_parser = eps(); + if (!gen_call_id_key.empty()) { + gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + } + + // Create (parser, key) pairs for all fields, then sort by parameters_order + std::vector> parser_pairs; + parser_pairs.emplace_back(tool_name_, effective_name_key); + parser_pairs.emplace_back(tool_args_, effective_args_key); + if (!call_id_key.empty()) { + parser_pairs.emplace_back(optional(id_parser), call_id_key); + } + if (!gen_call_id_key.empty()) { + parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key); + } + + std::sort(parser_pairs.begin(), parser_pairs.end(), + [¶meters_order](const auto & a, const auto & b) { + auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second); + auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second); + size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a); + size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b); + return idx_a < idx_b; + }); + + auto ordered_body = tool_open(literal("{")) + space(); + for (size_t i = 0; i < parser_pairs.size(); i++) { + ordered_body = ordered_body + parser_pairs[i].first; + if (i < parser_pairs.size() - 1) { + ordered_body = ordered_body + space() + literal(",") + space(); + } + } + ordered_body = ordered_body + space() + tool_close(literal("}")); + + tool_choices |= rule("tool-" + name, tool(ordered_body)); + } + + return tool_choices; +} + common_peg_parser common_chat_peg_unified_builder::standard_json_tools( const std::string & section_start, const std::string & section_end, @@ -528,239 +705,20 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools( return eps(); } - // Build tool choices for JSON format - auto tool_choices = choice(); - // auto other_member = json_string() + space() + literal(":") + space() + json(); - - // Determine effective field names std::string effective_name_key = name_key.empty() ? "name" : name_key; std::string effective_args_key = args_key.empty() ? "arguments" : args_key; - // Check if we have nested keys (dot notation) - auto name_spec = parse_key_spec(effective_name_key); - auto args_spec = parse_key_spec(effective_args_key); - bool has_nested_keys = !name_spec.first.empty() || !args_spec.first.empty(); - - // Mode 1: function_is_key - parse {"function_name": {...}} + // Dispatch to the appropriate builder based on the JSON layout mode + common_peg_parser tool_choices = eps(); if (function_is_key) { - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - // Build inner object fields - std::vector inner_fields; - - // Add optional string ID field - if (!call_id_key.empty()) { - auto id_parser = atomic( - literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + - literal("\"") + tool_id(json_string_content()) + literal("\"") - ); - inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space()))); - } - - // Add optional generated integer ID field - if (!gen_call_id_key.empty()) { - auto gen_id_parser = atomic( - literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + - choice({ - literal("\"") + tool_id(json_string_content()) + literal("\""), - tool_id(json_number()) - }) - ); - inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space()))); - } - - // Add arguments - either wrapped in args_key or parsed directly - common_peg_parser args_parser = eps(); - if (args_key.empty()) { - // Arguments are directly the inner object value: {"func_name": {"arg1": "val"}} - args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params)); - } else { - // Arguments are wrapped in a key: {"func_name": {"arguments": {"arg1": "val"}}} - args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() + - tool_args(schema(json(), "tool-" + name + "-schema", params)); - } - inner_fields.push_back(args_parser); - - // Build inner object parser - no greedy other_member skipping to avoid consuming ID - common_peg_parser inner_object = eps(); - if (args_key.empty() && inner_fields.size() == 1) { - // Direct arguments: {"func_name": {"arg1": "val"}} - // The args_parser is already the full object schema - inner_object = inner_fields[0]; - } else { - // Wrapped arguments: {"func_name": {"arguments": {"arg1": "val"}}} - inner_object = literal("{") + space(); - for (size_t i = 0; i < inner_fields.size(); i++) { - inner_object = inner_object + inner_fields[i]; - if (i < inner_fields.size() - 1) { - inner_object = inner_object + space(); - } - } - inner_object = inner_object + space() + literal("}"); - } - - // Tool call format: { "function_name": { inner_object } } - auto tool_parser = tool( - tool_open(literal("{")) + space() + - literal("\"") + tool_name(literal(name)) + literal("\"") + - space() + literal(":") + space() + - inner_object + - space() + tool_close(literal("}")) - ); - - tool_choices |= rule("tool-" + name, tool_parser); - } - } - // Mode 2: Nested keys (dot notation like "function.name") - else if (has_nested_keys) { - // Group fields by prefix - std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first; - std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key; - std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key; - - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - // Build nested object with name and arguments - auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() + - literal("\"") + tool_name(literal(name)) + literal("\""); - auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() + - tool_args(schema(json(), "tool-" + name + "-schema", params)); - - auto nested_object = literal("{") + space() + - nested_name + space() + literal(",") + space() + - nested_args + - space() + literal("}"); - - // Build top-level parser - simpler structure without greedy other_member skipping - // Format: { id?, "function": {...} } - auto tool_parser_body = tool_open(literal("{")) + space(); - - // Add optional string ID field at top level - if (!call_id_key.empty()) { - auto id_spec = parse_key_spec(call_id_key); - if (id_spec.first.empty()) { // Top-level ID field - auto id_parser = atomic( - literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + - literal("\"") + tool_id(json_string_content()) + literal("\"") - ); - tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space()); - } - } - - // Add optional generated integer ID field at top level - if (!gen_call_id_key.empty()) { - auto gen_id_spec = parse_key_spec(gen_call_id_key); - if (gen_id_spec.first.empty()) { // Top-level gen ID field - auto gen_id_parser = atomic( - literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + - choice({ - literal("\"") + tool_id(json_string_content()) + literal("\""), - tool_id(json_number()) - }) - ); - tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space()); - } - } - - // Add the nested object field - auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object; - tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}")); - - tool_choices |= rule("tool-" + name, tool(tool_parser_body)); - } - } - // Mode 3: Flat keys (enhanced with ID fields and parameter ordering) - else { - auto name_key_parser = literal("\"" + effective_name_key + "\""); - auto args_key_parser = literal("\"" + effective_args_key + "\""); - - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - auto tool_name_ = name_key_parser + space() + literal(":") + space() + - literal("\"") + tool_name(literal(name)) + literal("\""); - auto tool_args_ = args_key_parser + space() + literal(":") + space() + - tool_args(schema(json(), "tool-" + name + "-schema", params)); - - // Build ID parsers if keys are provided - common_peg_parser id_parser = eps(); - if (!call_id_key.empty()) { - id_parser = atomic( - literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + - choice({ - literal("\"") + tool_id(json_string_content()) + literal("\""), - tool_id(json_number()) - }) - ); - } - - common_peg_parser gen_id_parser = eps(); - if (!gen_call_id_key.empty()) { - gen_id_parser = atomic( - literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + - choice({ - literal("\"") + tool_id(json_string_content()) + literal("\""), - tool_id(json_number()) - }) - ); - } - - common_peg_parser tool_parser = eps(); - - // Use parameter ordering if provided - parse fields in specified order without greedy skipping - if (!parameters_order.empty()) { - } - // Build parser using parameter ordering (works with or without explicit parameters_order) - // Create list of (parser, key) pairs for all fields - std::vector> parser_pairs; - parser_pairs.emplace_back(tool_name_, effective_name_key); - parser_pairs.emplace_back(tool_args_, effective_args_key); - if (!call_id_key.empty()) { - parser_pairs.emplace_back(optional(id_parser), call_id_key); - } - if (!gen_call_id_key.empty()) { - parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key); - } - - // Sort by position in parameters_order (or at end if not present) - std::sort(parser_pairs.begin(), parser_pairs.end(), - [¶meters_order](const auto & a, const auto & b) { - auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second); - auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second); - size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a); - size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b); - return idx_a < idx_b; - }); - - // Build ordered parser - auto ordered_body = tool_open(literal("{")) + space(); - for (size_t i = 0; i < parser_pairs.size(); i++) { - ordered_body = ordered_body + parser_pairs[i].first; - if (i < parser_pairs.size() - 1) { - ordered_body = ordered_body + space() + literal(",") + space(); - } - } - ordered_body = ordered_body + space() + tool_close(literal("}")); - tool_parser = tool(ordered_body); - - tool_choices |= rule("tool-" + name, tool_parser); + tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key); + } else { + auto name_spec = parse_key_spec(effective_name_key); + auto args_spec = parse_key_spec(effective_args_key); + if (!name_spec.first.empty() || !args_spec.first.empty()) { + tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key); + } else { + tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order); } } @@ -770,7 +728,6 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools( tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices); } - // Optionally wrap in array brackets if (array_wrapped) { tool_calls = literal("[") + space() + tool_calls + space() + literal("]"); } diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h index f5d49a403a..c0392f0c5d 100644 --- a/common/chat-peg-parser.h +++ b/common/chat-peg-parser.h @@ -108,6 +108,27 @@ class common_chat_peg_unified_builder : public common_chat_peg_builder { const nlohmann::json & tools, bool parallel_tool_calls, bool force_tool_calls); + + private: + // Implementation helpers for standard_json_tools — one per JSON tool call layout mode + common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools, + const std::string & args_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key); + + common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key); + + common_peg_parser build_json_tools_flat_keys(const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key, + const std::vector & parameters_order); }; inline common_peg_arena build_chat_peg_unified_parser( @@ -119,11 +140,14 @@ inline common_peg_arena build_chat_peg_unified_parser( class common_chat_peg_unified_mapper : public common_chat_peg_mapper { std::optional pending_tool_call; // Tool call waiting for name - common_chat_tool_call * current_tool = nullptr; - int arg_count = 0; - bool needs_closing_quote = false; + common_chat_tool_call * current_tool = nullptr; + int arg_count = 0; + bool closing_quote_pending = false; std::string args_buffer; // Buffer to delay arguments until tool name is known - bool buffer_needs_closing_quote = false; // Track quote state for buffered args + + // Returns a reference to the active argument destination string. + // Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments. + std::string & args_target(); public: common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {} diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index e64e362129..d9f1eea2f2 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -369,6 +369,28 @@ static common_chat_tool amount_tool{ })", }; +static common_chat_tool imaginary_number_tool{ + /* .name = */ "imaginary_number", + /* .description = */ "Imaginary number converter", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "number": { + "type": "object", + "properties": { + "real": { + "type": "number" + }, + "imaginary": { + "type": "number" + } + }, + "required": ["real", "imaginary"] + } + }, + "required": ["number"] + })", +}; static common_chat_tool string_param_tool{ /* .name = */ "string_param", @@ -394,7 +416,7 @@ static common_chat_tool quoted_unquoted_tool{ "quoted": { "type": "string", "description": "Quoted value" - }, + }, "unquoted": { "type": "string", "description": "Unquoted value" @@ -2323,6 +2345,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); + tst.test( + "Test imaginary number\n" + "\n" + "\n" + "\n" + "\n" + "{ \"real\": 3.14, \"imaginary\": 2.71 }\n" + "\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ imaginary_number_tool }) + .expect_reasoning("Test imaginary number") + .expect_tool_calls({ + { "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} } + }) + .run(); + } } From 92acde0890c53970f782fcad0cfabce8aa3bd456 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 7 Feb 2026 23:39:50 +0100 Subject: [PATCH 22/39] Regenerate documentation --- docs/autoparser.md | 829 ++++++++++++++++++++------------------------- 1 file changed, 369 insertions(+), 460 deletions(-) diff --git a/docs/autoparser.md b/docs/autoparser.md index 4b48cceb76..cdbcce23b8 100644 --- a/docs/autoparser.md +++ b/docs/autoparser.md @@ -10,45 +10,17 @@ The unified auto-parser uses a **pure differential, compositional approach** to - **Zero Hardcoded Patterns**: All markers extracted through template comparison (the **only heuristic** is JSON detection) - **Compositional Architecture**: Separate parsers for reasoning, content, and tools that compose cleanly -- **Variant Types**: Structural descriptions (strings) instead of forced enum classification -**Two-Phase Analysis**: +**Four-Phase Analysis**: -1. **Phase 1: Content & Reasoning Analysis** - Analyzes how the template handles basic content and reasoning, without considering tools -2. **Phase 2: Tool Call Analysis** - Analyzes tool calling patterns, layered on top of Phase 1 +1. **Phase 1: Reasoning Analysis** (R1-R3) - Detects reasoning markers and mode +2. **Phase 2: Content Analysis** (C1) - Detects content wrapping markers +3. **Phase 3: Tool Call Analysis** (T1-T7) - Extracts tool section, function, and call ID markers +4. **Phase 4: Argument Analysis** (A1-A2) - Extracts argument name/value markers (TAG_WITH_TAGGED only) ## Data Structures -### content_structure (Phase 1 Result) - -Describes how the template handles content and reasoning: - -```cpp -struct content_structure { - enum reasoning_mode_type { - REASONING_NONE, // No reasoning markers detected - REASONING_OPTIONAL, // ... may appear before content - REASONING_FORCED_OPEN, // Template ends with open reasoning tag OR starts implicitly (empty start, present end) - }; - - reasoning_mode_type reasoning_mode = REASONING_NONE; - std::string reasoning_start; // e.g., "", "<|START_THINKING|>" - std::string reasoning_end; // e.g., "", "<|END_THINKING|>" - - // Content wrapping mode - enum content_mode_type { - CONTENT_PLAIN, // No content markers - CONTENT_ALWAYS_WRAPPED, // ... always present - CONTENT_WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present - }; - - content_mode_type content_mode = CONTENT_PLAIN; - std::string content_start; // e.g., "", "<|START_RESPONSE|>" - std::string content_end; // e.g., "", "<|END_RESPONSE|>" -}; -``` - -### diff_analysis_result (Analysis Result) +### diff_analysis_result The result of differential analysis contains all extracted markers and format classifications: @@ -58,77 +30,128 @@ struct diff_analysis_result { reasoning_mode reasoning = reasoning_mode::NONE; content_mode content = content_mode::PLAIN; tool_format tools = tool_format::NONE; - argument_format args = argument_format::JSON; // All extracted markers (see marker_registry below) marker_registry markers; - // JSON field names (for JSON-based formats) - std::string name_field = "name"; - std::string args_field = "arguments"; - std::string id_field; + // JSON field names (for JSON_NATIVE format) + bool fun_name_is_key = false; // Function name is the JSON key: {"func_name": {...}} + std::string function_field = "function"; // Outer object key (e.g., "function" in "function.name") + std::string name_field = "name"; + std::string args_field = "arguments"; + std::string id_field; // String call ID field (e.g., "id") + std::string gen_id_field; // Generated integer call ID field (e.g., "tool_call_id") + std::vector parameter_order; // Order of JSON fields for parsing + + // Call ID position (for non-JSON formats) + call_id_position call_id_pos = call_id_position::NONE; // Flags bool supports_tools = false; bool supports_parallel_calls = false; bool requires_nonnull_content = false; + bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...] - // Preserved tokens for tokenizer + // Preserved tokens for tokenizer (union of all non-empty markers) std::vector preserved_tokens; }; ``` -### marker_registry (Extracted Markers) +### Enums + +**`reasoning_mode`**: How the template handles reasoning/thinking blocks. + +| Value | Description | +|----------------------|-------------------------------------------------------------------------------| +| `NONE` | No reasoning markers detected | +| `TAG_BASED` | Standard tag-based: `...` | +| `DELIMITER` | Delimiter-based: reasoning ends at delimiter (e.g., `[BEGIN FINAL RESPONSE]`) | +| `FORCED_OPEN` | Template ends with open reasoning tag (empty start, non-empty end) | +| `FORCED_CLOSED` | Both tags when disabled; only start tag when enabled | +| `TOOLS_ONLY` | Reasoning only appears when tool calls are present | + +**`content_mode`**: How the template wraps content. + +| Value | Description | +|----------------------------|------------------------------------------------------| +| `PLAIN` | No content markers | +| `ALWAYS_WRAPPED` | Content always wrapped: `...` | +| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present | + +**`tool_format`**: Classification of tool call structure. + +| Value | Description | +|--------------------|------------------------------------------------------------------| +| `NONE` | No tool support detected | +| `JSON_NATIVE` | Pure JSON: `{"name": "X", "arguments": {...}}` | +| `TAG_WITH_JSON` | Tag-based with JSON args: `{...}` | +| `TAG_WITH_TAGGED` | Tag-based with tagged args: `value` | + +**`call_id_position`**: Where call IDs appear relative to function name and arguments (for non-JSON formats). + +| Value | Description | +|----------------------------|------------------------------------------| +| `NONE` | No call ID support detected | +| `PRE_FUNC_NAME` | Before function name | +| `BETWEEN_FUNC_AND_ARGS` | Between function name and arguments | +| `POST_ARGS` | After arguments | + +### marker_registry All markers are extracted via differential analysis without hardcoded patterns: ```cpp struct marker_registry { - // === Reasoning markers === - std::string reasoning_start; // e.g., "", "[THINK]", "<|START_THINKING|>" - std::string reasoning_end; // e.g., "", "[/THINK]", "<|END_THINKING|>" + // === Reasoning markers (from R1-R3) === + std::string reasoning_start; // e.g., "", "[THINK]", "<|START_THINKING|>", "" + std::string reasoning_end; // e.g., "", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>" - // === Content markers === - std::string content_start; // e.g., "", ">>>all\n" - std::string content_end; // e.g., "" + // === Content markers (from C1) === + std::string content_start; // e.g., "", "" + std::string content_end; // e.g., "", "" - // === Tool section markers === + // === Tool section markers (from T1-T2) === std::string tool_section_start; // e.g., "", "[TOOL_CALLS]" - std::string tool_section_end; // e.g., "", "]" - std::string per_call_start; // e.g., "\u2985" (for multi-call templates) - std::string per_call_end; // e.g., " \u2985" + std::string tool_section_end; // e.g., "", "" + std::string per_call_start; // e.g., "<|tool_call_begin|>" (for multi-call templates) + std::string per_call_end; // e.g., "<|tool_call_end|>" std::string call_separator; // e.g., ",", "\n" - // === Function markers === - std::string func_name_prefix; // e.g., "", "\"" + // === Function markers (from T3-T6) === + std::string func_name_prefix; // e.g., "", ":0" std::string func_close; // e.g., "" - std::string args_start; // e.g., "{", " \u300b" - std::string args_end; // e.g., "}", "" + std::string args_start; // e.g., "{" + std::string args_end; // e.g., "}" - // === Argument markers (for tagged args format) === + // === Argument markers (from A1-A2, for TAG_WITH_TAGGED) === std::string arg_name_prefix; // e.g., "" std::string arg_name_suffix; // e.g., ">", "" std::string arg_value_prefix; // e.g., "", "" std::string arg_value_suffix; // e.g., "", "" - std::string arg_separator; + std::string arg_separator; // e.g., "", "\n" + + // === Call ID markers (from T7) === + std::string call_id_prefix; // e.g., "[CALL_ID]" + std::string call_id_suffix; // e.g., "[ARGS]" // === Special markers === - std::string code_block_marker; // e.g., "Action:" (markdown code block format) - std::string id_marker; // e.g., "[CALL_ID]" (bracket-tag format) - std::string function_namespace; // e.g., "functions." (prefixed-indexed format) + std::string code_block_marker; // e.g., "Action:" (for markdown code block format) + std::string code_block_language; // e.g., "json" + std::string function_namespace; // e.g., "functions." (for prefixed-indexed format) }; ``` ## Tool Calling Formats -The auto-parser recognizes three primary tool calling formats. Other formats may be deprecated in future versions. +The auto-parser recognizes three tool calling formats. ### JSON_NATIVE **Structure**: The entire tool call (function name, arguments, and values) is in JSON format. There may be enclosing tags around the tool calling section. **Characteristics**: + - Function name is a JSON field: `"name": "function_name"` - Arguments are a JSON object: `"arguments": {"key": "value"}` - May be wrapped in section markers like `...` or `[TOOL_CALLS]...]` @@ -136,6 +159,7 @@ The auto-parser recognizes three primary tool calling formats. Other formats may **Examples**: Standard OpenAI-style: + ```json {"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}} @@ -143,19 +167,13 @@ Standard OpenAI-style: ``` Mistral Nemo with array wrapper: + ```json [TOOL_CALLS] [{"name": "calculate", "arguments": {"expr": "2+2"}}] ``` -Hermes-style with tool_calls wrapper: -```json - -{"name": "search", "arguments": {"query": "llama.cpp"}} - -``` - -**Detection**: `args_start == "{"`, `args_end == "}"`, no function name prefix markers +**Detection**: Function name found inside a JSON structure (determined by JSON parse attempt). --- @@ -164,32 +182,22 @@ Hermes-style with tool_calls wrapper: **Structure**: The function name is outside the JSON structure, typically within quasi-XML markers. Arguments are still provided as a JSON object. **Characteristics**: -- Function name appears in tag attributes: `` or `` + +- Function name appears in tag attributes: `` or `function_name` - Arguments are a JSON object following the tag - Has closing tags: `` or `` - Arguments remain valid JSON **Examples**: -Nemotron-style: -```xml -get_weather{"location": "Paris"} -``` - Functionary v3.1: + ```xml {"location": "Paris", "unit": "celsius"} ``` -ByteDance Seed-OSS: -```xml - -get_weather -{"location": "Paris"} - -``` - MiniMax: + ```xml calculate @@ -197,7 +205,7 @@ MiniMax: ``` -**Detection**: `func_name_prefix` starts with `<`, `args_start == "{"`, arguments are JSON +**Detection**: Function name not in JSON, but arguments are JSON (args_start is `{`). --- @@ -206,6 +214,7 @@ MiniMax: **Structure**: Both the function name AND argument names are in XML-style tags. Argument values may be JSON or unquoted primitives depending on schema type. **Characteristics**: + - Function name in tag: `` or `` - Each argument has its own tag: `value` - String values are **unquoted** (raw text content of the tag) @@ -215,6 +224,7 @@ MiniMax: **Examples**: Qwen/Hermes XML format: + ```xml Paris @@ -225,6 +235,7 @@ Qwen/Hermes XML format: Note how string values (`Paris`, `celsius`) are unquoted inside the tags. Mixed types example: + ```xml 2+2 @@ -234,467 +245,365 @@ Mixed types example: ``` Here: + - `expr` and `precision` are strings (unquoted) - `options` is an object (JSON-formatted inside the tag) -**Detection**: `arg_name_prefix` is non-empty, arguments use tagged format rather than JSON object +**Detection**: `arg_name_prefix` is non-empty, arguments use tagged format rather than JSON object. --- -### Other Formats (To Be Deprecated) - -The following formats are currently supported but will likely be deprecated: - -| Format | Description | Example | -|--------|-------------|---------| -| `BRACKET_TAG` | Bracket-based markers | `[TOOL_CALLS]func[ARGS]{...}` | -| `PREFIXED_INDEXED` | Namespace prefix with index | `functions.name:0{...}` | -| `RECIPIENT_BASED` | Recipient routing | `>>>recipient\n{content}` | -| `MARKDOWN_BLOCK` | Markdown code blocks | `Action:\n\`\`\`json\n[...]` | - ## Analysis Flow -```console +```text Template | v -Phase 1: analyze_content_structure() - |-- detect_reasoning_markers() - compare outputs with reasoning_content vs without - |-- detect_content_markers() - render with content and detect wrapping - |-- detect_reasoning_mode() - check if prompt ends with open tag +differential_analyzer::analyze(tmpl) | - v -content_structure + |-- Phase 1: analyze_reasoning(tmpl, result) + | |-- R1: compare_reasoning_presence() — with/without reasoning_content field + | |-- R2: compare_thinking_enabled() — enable_thinking=false vs true + | '-- R3: compare_reasoning_scope() — reasoning with content vs with tools | - v -Phase 2: analyze_tool_structure() - |-- Check minja.supports_tool_calls - |-- Differential analysis for tool patterns - |-- Classify function format (JSON vs tagged) - |-- Classify argument format (JSON vs tagged) + |-- Phase 2: analyze_content(tmpl, result) + | '-- C1: compare_content_values() — content vs tools vs reasoning + | + |-- Phase 3: analyze_tools(tmpl, result) + | |-- T1: analyze_tool_calls() — no tools vs with tools + format classification + | |-- T2: check_per_call_markers() — per-section vs per-call markers + | |-- T3: extract_call_separator() — separator between multiple calls + | |-- T4: extract_function_markers() — func_alpha vs func_beta + | |-- T5: extract_argument_separator() — 1 arg vs 2 args + | |-- T6: extract_args_markers() — no args vs with args + | '-- T7: extract_call_id_markers() — call_id "call00001" vs "call99999" + | + |-- Phase 4: analyze_arguments(tmpl, result) [TAG_WITH_TAGGED only] + | |-- A1: extract_argument_name_markers() — "first" arg vs "second" arg + | '-- A2: extract_argument_value_markers() — value "XXXX" vs "YYYY" + | + '-- collect_preserved_tokens(result) | v diff_analysis_result | v -generate_parser(diff_analysis_result) - |-- build_reasoning_block(diff_analysis_result) - |-- build_content_block(diff_analysis_result) - |-- build_tool_section(diff_analysis_result, tools) - |-- Compose into final parser +universal_peg_generator::generate_parser(tmpl, inputs, analysis) + |-- build_parser(analysis, inputs, ...) — builds PEG parser arena + | |-- Reasoning parser (based on reasoning_mode) + | |-- Content parser (based on content_mode) + | '-- Tool parser (dispatches by tool_format): + | |-- build_tool_parser_json_native() + | |-- build_tool_parser_tag_json() + | '-- build_tool_parser_tag_tagged() + | + |-- Build GBNF grammar (if tools present) + '-- Set grammar triggers from tool markers | v -common_chat_params (parser, grammar, triggers, preserved_tokens) +common_chat_params (prompt, parser, grammar, triggers, preserved_tokens) ``` ## Entry Point -The mechanism starts in `common/chat.cpp`, in `common_chat_templates_apply_jinja`: +The auto-parser is invoked in `common/chat.cpp` in `common_chat_templates_apply_jinja`. A few specialized templates are handled first (Ministral/Magistral Large 3, GPT-OSS, Functionary v3.2), then the auto-parser handles everything else: ```cpp -// 1. Analyze the template (two-phase) -auto analysis = differential_analyzer::analyze(tmpl); - -// 2. Generate the parser and grammar -auto auto_params = universal_peg_generator::generate_parser(tmpl, params); - -// 3. Use if it provides more than basic content handling -if (auto_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY || - !auto_params.parser.empty()) { +try { + LOG_DBG("Using differential autoparser\n"); + auto auto_params = universal_peg_generator::generate_parser(tmpl, params); return auto_params; +} catch (const std::exception & e) { + LOG_WRN("Automatic parser generation failed: %s\n", e.what()); } ``` -## Builder Methods - -The unified builder (`common_chat_peg_unified_builder`) provides high-level methods: - -- `build_reasoning_block(analysis, reasoning_format, thinking_forced_open)` - Build reasoning parser -- `build_content_block(analysis, reasoning_format)` - Build content parser -- `build_tool_section(analysis, tools, parallel_tool_calls, force_tool_calls)` - Build tool section -- `build_function(analysis, name, schema)` - Build single function parser -- `build_arguments(analysis, schema)` - Build arguments parser - -## Key Templates Supported - -- **Granite** - `` + `` with tool calls -- **Nemotron** - JSON tools with `` wrapper -- **Qwen/Hermes** - XML-style `` format (TAG_WITH_TAGGED) -- **Command-R7B** - `<|START_THINKING|>`/`<|START_RESPONSE|>` + `<|START_ACTION|>` tools -- **DeepSeek R1** - Forced thinking + complex tools -- **Mistral Nemo** - `[TOOL_CALLS]` wrapper (JSON_NATIVE) -- **MiniMax** - `` wrapper with JSON args (TAG_WITH_JSON) -- **GLM-4.6** - `` + `name\n......` format -- **Kimi-K2** - `PREFIXED_INDEXED` format with namespace and indices -- **Mistral Small 3.2** - `BRACKET_TAG` format with `[TOOL_CALLS]` markers -- **Functionary v3.2** - `RECIPIENT_BASED` format with `>>>` routing - -## Files - -| File | Purpose | -|------|---------| -| `common/chat-auto-parser.h` | Data structures and API declarations | -| `common/chat-diff-analyzer.h/cpp` | Differential analysis implementation | -| `common/chat-auto-parser-generator.cpp` | PEG parser generator | -| `common/chat-auto-parser-helpers.h/cpp` | Shared helper functions | -| `common/chat-peg-parser.h/cpp` | Unified builder and mapper classes | -| `common/chat.cpp` | Main entry point and wire-up | - ## Algorithm Details -### Phase 1: Content & Reasoning Analysis +### Core Mechanism: Differential Comparison -#### Reasoning Detection (4 Methods) - -**Method 1: Differential Reasoning Content Analysis** - -- Render template with `reasoning_content` field present vs absent -- Compare outputs to find markers between reasoning and content -- If only closing tag found, derive opening tag using patterns: - - XML: `` → `` - - Special tokens: `<|END_X|>` → `<|START_X|>`, `<|/X|>` → `<|X|>` -- Handles various tag formats including XML and special token formats - -**Method 2: Enable-Thinking Toggle Analysis** - -- Toggle `enable_thinking` context variable between true/false -- Detects differences in generated prompts -- Handles two scenarios: - - **Normal case**: enable_thinking=true adds reasoning markers - - **Reverse case**: enable_thinking=false adds empty thinking block (GLM-4.6 style) -- Uses string difference analysis to extract markers -- Validates extracted tags against blacklist of role markers - -**Method 3: Prompt Ending Analysis** - -- Checks if prompt ends with unclosed reasoning tag -- Looks for trailing tags in prompt with `enable_thinking=true` -- Differentiates between open tags (``) and close tags (``) -- Handles blacklisted tags (role markers, system tokens) -- Validates reasoning-like patterns (contains "think", "reason", "thought") - -**Method 4: Adjacent Tag Pair Detection** - -- Looks for patterns like ``, `<|START_THINKING|><|END_THINKING|>`, `[think][/think]` -- Searches for predefined tag patterns in prompt -- Validates tags are adjacent with only whitespace between -- Supports both simple and complex token formats - -#### Content Detection Algorithm - -1. **Dual-Mode Rendering**: Render template with content marker in both thinking-enabled and thinking-disabled modes -2. **Pattern Matching**: Search for known content wrapper patterns: - - `<|START_RESPONSE|>` / `<|END_RESPONSE|>` - - `` / `` - - `` / `` - - `` / `` - - `<|CHATBOT_TOKEN|>` / `<|END_OF_TURN_TOKEN|>` -3. **Mode Classification**: - - `CONTENT_ALWAYS_WRAPPED`: Found in both thinking modes - - `CONTENT_WRAPPED_WITH_REASONING`: Found only with thinking enabled - - `CONTENT_PLAIN`: No wrapping detected - -#### Reasoning Mode Detection - -- **REASONING_FORCED_OPEN**: - - **Explicit**: Prompt ends with reasoning start marker (e.g., ``). - - **Implicit**: reasoning end marker is present but start marker is empty (e.g., `[BEGIN FINAL RESPONSE]`). -- **REASONING_OPTIONAL**: Markers present but not forced. -- **REASONING_NONE**: No markers detected. - -### Phase 2: Tool Call Structure Analysis - -#### Pure Differential Analysis Algorithm - -**Key Principle**: All patterns are extracted through template comparison. The **only heuristic** is detecting JSON vs marker-based structures (via JSON parse attempt). No hardcoded pattern lists. - -**Comparison Matrix**: - -| Comparison | Purpose | What's Extracted | -|------------|---------|------------------| -| **T1**: No tools vs tools | Tool section markers | `tool_section_start`, `tool_section_end` | -| **T2**: 1 call vs 2 calls | Call separators | `per_call_start`, `call_separator` | -| **T3**: func_alpha vs func_beta | Function boundaries | `func_name_prefix`, `func_name_suffix` | -| **T4**: 1 arg vs 2 args | Argument separator | `arg_separator` | -| **T5**: No args vs args | Args container | `args_start`, `args_end` | -| **A1**: key1 vs key2 | Arg name boundaries | `arg_name_prefix`, `arg_name_suffix` | -| **A2**: value A vs B | Arg value boundaries | `arg_value_prefix`, `arg_value_suffix` | -| **A3**: number vs string | Quoting behavior | Value type handling | - -**Structural Extraction Helpers**: +All analysis phases use the same factorized comparison function: ```cpp -// Extract last structural marker from string (finds last <, [, {, or ") -std::string extract_structural_suffix(const std::string & str); - -// Extract first structural marker from string (finds first >, ], }, or ") -std::string extract_structural_prefix(const std::string & str); - -// The only heuristic: detect if content is valid JSON -bool is_json_based(const std::string & content); +compare_variants(tmpl, params_A, params_modifier) ``` -**Pattern Extraction Process** (Example - T1: Tool Section Markers): - -1. Render template with/without tool calls -2. Compute diff: `calculate_diff_split(output_no_tools, output_with_tools)` -3. Use controlled function name (`func_alpha`) as anchor in `diff.right` -4. Extract structural prefix before function name → `tool_section_start` -5. Extract structural suffix after tool content → `tool_section_end` - -**No Pattern Lists**: Unlike the old approach, there are no hardcoded lists like `["", "[TOOL_CALLS]", ...]`. All markers are discovered through differential comparison. - -#### Variant Detection Logic - -Instead of forcing patterns into enum types, the analyzer detects **variant types** as strings that describe the structural characteristics: - -**Variant Types**: - -- `"json-native"`: Pure JSON tool calls (Llama, Mistral Nemo) -- `"tagged-json"`: Function name in markers, args in JSON (Functionary v3.1, Nemotron) -- `"tagged-args"`: Full XML-style with tagged arguments (Qwen, Hermes, MiniMax) -- `"bracket-tag"`: Bracket markers (Mistral Small 3.2: `[TOOL_CALLS]func[ARGS]{...}`) -- `"recipient-based"`: Recipient routing (Functionary v3.2: `>>>func_name`) -- `"markdown-block"`: Markdown code blocks (Cohere Command-R Plus) -- `"prefixed-indexed"`: Namespace prefix with indices (Kimi-K2: `functions.name:0`) - -**Detection Strategy** (from most to least distinctive): +This creates variant B by applying a modifier lambda to a copy of params_A, renders both through the template, and computes a `diff_split`: ```cpp -void detect_tool_variant(diff_analysis_result & result) { - // 1. Check for unique markers (most distinctive) - if (!result.markers.id_marker.empty()) - → "bracket-tag" - - if (markers contain ">>>") - → "recipient-based" - - if (code_block_marker present) - → "markdown-block" - - if (function_namespace or suffix contains ':') - → "prefixed-indexed" - - // 2. Check argument structure (JSON variants) - if (arg_name_prefix starts with '<') - → "tagged-args" - - if (func_name_prefix starts with '<') - → "tagged-json" - - // 3. Default - → "json-native" -} +struct diff_split { + std::string prefix; // Common prefix between A and B + std::string suffix; // Common suffix between A and B + std::string left; // Unique to variant A + std::string right; // Unique to variant B +}; ``` -#### Compositional Parser Building +The diff is computed via `calculate_diff_split()`, which uses longest-common-prefix/suffix with iterative tag boundary fixing — it moves incomplete `<...>` or `[...]` markers from prefix/suffix into the left/right parts until stable. -The analyzer builds separate, composable parsers for each component: +Text is segmentized into markers and non-marker fragments using `segmentize_markers()`, which splits on `<...>` and `[...]` boundaries. -**Reasoning Parser**: +### Phase 1: Reasoning Analysis -- Built from `reasoning_start` and `reasoning_end` markers -- Supports tag-based, delimiter, and forced-open modes +Three comparisons extract reasoning markers and classify the reasoning mode: -**Content Parser**: +**R1 — `compare_reasoning_presence()`**: Compares assistant message with vs without a `reasoning_content` field. -- Built from `content_start` and `content_end` markers -- Supports plain, always-wrapped, and conditionally-wrapped modes +- Segmentizes `diff.right` to find markers around the reasoning content +- 3+ segments → `TAG_BASED` (start marker, content, end marker) +- 2 segments → `DELIMITER` (content followed by delimiter) +- Special case: markers found in prefix/suffix → `FORCED_CLOSED` -**Tool Parser** (variant-specific): +**R2 — `compare_thinking_enabled()`**: Compares `enable_thinking=false` vs `true`. -- Built based on `variant_type` detection -- Each variant has its own builder that uses the extracted markers -- No enum forcing - structure preserved as discovered +- Detects `FORCED_OPEN`: template adds opening tag when thinking enabled +- Detects `FORCED_CLOSED`: disable mode has both markers, enable mode has only start +- Handles reverse patterns (e.g., GLM-4.6 where disabled adds empty block) -**Final Composition**: +**R3 — `compare_reasoning_scope()`**: Compares reasoning with content vs with tool calls. -```cpp -sequence({ - reasoning_parser, - space(), - content_parser, - space(), - tool_parser, - end() -}) +- Detects `TOOLS_ONLY`: reasoning appears only when tool calls are present +- Extracts reasoning markers from tool call output by segmentizing + +### Phase 2: Content Analysis + +**C1 — `compare_content_values()`**: Compares content-only output vs tools output vs reasoning output. + +- Creates two comparisons: content→tools and content→reasoning +- Finds content text position in diff to extract surrounding markers +- Classifies: + - `ALWAYS_WRAPPED`: content has start/end markers in both comparisons + - `WRAPPED_WITH_REASONING`: markers only when reasoning is present + - `PLAIN`: no wrapping markers detected + +### Phase 3: Tool Call Analysis + +**T1 — `analyze_tool_calls()`**: Compares no-tools vs with-tools output. + +- Calls `analyze_tool_call_format()` to classify the format using the **only heuristic**: a JSON parse attempt + - `in_json_haystack()` checks whether the function name appears inside a JSON structure + - If function name is in JSON → `JSON_NATIVE` → `analyze_tool_call_format_json_native()`: + - Parses JSON structure, matches needle values to extract field names + - Detects `fun_name_is_key`, `function_field`, `name_field`, `args_field`, `id_field`, `gen_id_field` + - Detects `tools_array_wrapped` by checking for `[` before JSON + - Builds `parameter_order` by sorting fields by position + - Extracts `tool_section_start`/`tool_section_end` + - If function name is not in JSON → `analyze_tool_call_format_non_json()`: + - Segmentizes the haystack into markers and text + - Uses symmetry: counts opening markers, matches with closing markers + - Extracts `tool_section_start`, `tool_section_end`, `per_call_start`, `per_call_end` + +**T2 — `check_per_call_markers()`**: Compares 1 call vs 2 calls. + +- If the second call starts with `tool_section_start`, markers are per-call not per-section +- Moves tool_section markers to per_call markers, clears section markers + +**T3 — `extract_call_separator()`**: Compares 1 call vs 2 calls. + +- Finds separator between calls using `until_common_prefix(diff.right, ...)` with the two function names as anchors + +**T4 — `extract_function_markers()`**: Compares function name "foofoo" vs "barbar". + +- Finds function name in diff, segmentizes to extract prefix/suffix markers +- Extracts `func_name_prefix`, `func_name_suffix` +- Searches for closing marker after args to extract `func_close` + +**T5 — `extract_argument_separator()`**: Compares 1 argument vs 2 arguments. + +- Uses `until_common_prefix()` with argument names as anchors to find the separator + +**T6 — `extract_args_markers()`**: Compares 0 arguments vs 1 argument. + +- Uses `until_common_prefix()` and `after_common_suffix()` to find container markers +- Extracts `args_start`, `args_end` + +**T7 — `extract_call_id_markers()`**: Compares call IDs "call00001" vs "call99999". + +- Determines position relative to function name and arguments +- Classifies as `PRE_FUNC_NAME`, `BETWEEN_FUNC_AND_ARGS`, or `POST_ARGS` +- Extracts `call_id_prefix`, `call_id_suffix` + +### Phase 4: Argument Analysis (TAG_WITH_TAGGED only) + +Only runs when Phase 3 detected TAG_WITH_TAGGED or TAG_WITH_JSON format with non-JSON argument structures. + +**A1 — `extract_argument_name_markers()`**: Compares argument name "first" vs "second". + +- Finds common prefix of diff.left/right to extract marker structure +- Extracts `arg_name_prefix`, `arg_name_suffix` + +**A2 — `extract_argument_value_markers()`**: Compares value "XXXX" vs "YYYY". + +- Segmentizes prefix/suffix around value to find markers +- Extracts `arg_value_prefix`, `arg_value_suffix` + +### Parser Building + +The parser generator (`universal_peg_generator`) takes the analysis result and builds a PEG parser arena. The entry point is `generate_parser(tmpl, inputs)`, which: + +1. Runs `differential_analyzer::analyze(tmpl)` to get the analysis result +2. Calls `build_parser(analysis, inputs, ...)` to construct the PEG parser +3. Builds a GBNF grammar if tools are present (for constrained decoding) +4. Sets grammar triggers from `tool_section_start` or `per_call_start` + +#### Reasoning Parser Construction + +Built inline in `build_parser()` based on `reasoning_mode`: + +| Mode | Parser | +|-----------------------------------|---------------------------------------------------------------------------------------------| +| `FORCED_OPEN` / `FORCED_CLOSED` | `reasoning(until(end)) + end` — expects reasoning immediately (opening tag was in template) | +| `TAG_BASED` / `TOOLS_ONLY` | `optional(start + reasoning(until(end)) + end)` | +| `DELIMITER` | `optional(reasoning(until(end)) + end)` — no start marker, reasoning ends at delimiter | + +#### Content Parser Construction + +| Condition | Parser | +|------------------------------------|---------------------------------------------------------------------------| +| `json_schema` present | `reasoning + space() + content(schema(json(), ...)) + end()` | +| Tools present | Dispatches to tool parser builder | +| `ALWAYS_WRAPPED` with reasoning | `reasoning + start + content(until(end)) + end + end()` | +| `ALWAYS_WRAPPED` without reasoning | `content(until(start)) + start + content(until(end)) + end + end()` | +| Default | `reasoning + content(rest()) + end()` | + +#### Tool Parser Construction + +`build_tool_parser()` dispatches by `tool_format`: + +**`build_tool_parser_json_native()`**: Uses the `standard_json_tools()` builder helper which has three internal modes: + +- `build_json_tools_function_is_key()` — function name is the JSON key: `{"get_weather": {"location": "Paris"}}` +- `build_json_tools_nested_keys()` — nested object: `{"function": {"name": "X", "arguments": {...}}}` +- `build_json_tools_flat_keys()` — flat object: `{"name": "X", "arguments": {...}}` + +Handles content wrappers, array wrapping, parallel calls, and section markers. + +**`build_tool_parser_tag_json()`**: For each tool, builds: + +```text +tool_open(prefix + tool_name(literal(name)) + suffix) + + call_id_section + + tool_args(schema(json(), tool_schema)) ``` -### Generator Algorithms +Wraps in per-call or section markers. Handles parallel calls. -#### Unified Parser Building +**`build_tool_parser_tag_tagged()`**: For each tool, builds per-argument parsers: -**Composition Strategy**: +- String types: `tool_arg_string_value(schema(until(suffix), ...))` +- JSON types: `tool_arg_json_value(schema(json(), ...))` +- Required vs optional arguments +- Arguments joined with `space()` between them -```cpp -// Standard format -sequence({ reasoning, space(), content, space(), tools, space(), content, end() }) +Handles `func_close`, `peek()` for partial parsing safety, and call_id sections. -// With section markers -sequence({ reasoning, space(), content_until(section_start), space(), tools, space(), content, end() }) +All three return: `reasoning + optional(content(until(trigger))) + tool_calls + end()` -// Forced thinking handling -optional(reasoning) when thinking_forced_open && tools present -``` +### Mapper -**Trigger Word Detection**: +The `common_chat_peg_unified_mapper` maps PEG parse results (AST nodes) into `common_chat_msg` structures. Key design: -- Uses `tool_section_start` as primary trigger -- Falls back to `function_prefix` or `per_call_start` -- Raw JSON uses regex pattern trigger +- **Buffered arguments**: Before `tool_name` is known, argument text goes to `args_buffer`; once name is set, the buffer is flushed to `current_tool->arguments` +- **`args_target()`**: Returns a reference to whichever destination is active, eliminating branching +- **`closing_quote_pending`**: Tracks whether a closing `"` needs to be appended when a string argument value is finalized +- **Quote normalization**: Python-style quotes (`'key': 'value'`) are converted to JSON (`"key": "value"`) +- **Brace auto-closing**: At tool close, unclosed `{` braces are closed automatically (tracked via `json_brace_depth()`) -**Lazy Grammar Optimization**: +## Files -- Enabled by default for performance -- Disabled when thinking forced open -- Disabled when no clear trigger word exists +| File | Purpose | +|-------------------------------------------|-------------------------------------------------------------------| +| `common/chat-auto-parser.h` | `universal_peg_generator` class and `templates_params` struct | +| `common/chat-auto-parser-generator.cpp` | Parser generator implementation | +| `common/chat-diff-analyzer.h` | Analysis result types, enums, and `differential_analyzer` class | +| `common/chat-diff-analyzer.cpp` | Differential analysis implementation | +| `common/chat-auto-parser-helpers.h/cpp` | `calculate_diff_split()`, `segmentize_markers()`, string helpers | +| `common/chat-peg-parser.h/cpp` | PEG builder and mapper classes | +| `common/chat.cpp` | Entry point: `common_chat_templates_apply_jinja()` | +| `tools/parser/debug-template-parser.cpp` | Debug tool for template analysis | +| `tools/parser/template-analysis.cpp` | Template analysis tool | ## Testing & Debugging -### Comprehensive Test Coverage - -The test suite covers: - -**Reasoning Models**: - -- Qwen-QwQ-32B (forced-open thinking) -- DeepSeek R1 variants (reasoning only) -- IBM Granite (reasoning + tools) -- ByteDance Seed-OSS (custom reasoning tags) -- Ministral-3-14B-Reasoning -- llama-cpp-deepseek-r1 - -**Tool Call Formats**: - -- JSON_NATIVE: Llama 3.x, Mistral Nemo, Hermes, MiMo-VL -- TAG_WITH_JSON: Nemotron, Qwen3-Coder, MiniMax -- TAG_WITH_TAGGED: Qwen, Hermes (XML), ByteDance Seed-OSS -- BRACKET_TAG: Mistral Small 3.2, Devstral -- PREFIXED_INDEXED: Kimi-K2 variants -- RECIPIENT_BASED: Functionary v3.2 -- MARKDOWN_BLOCK: Cohere Command-R Plus - -**Edge Cases**: - -- Streaming/partial parsing -- Empty content with tools -- Parallel tool calls -- Forced thinking mode -- Multi-byte Unicode markers -- Null content handling -- Multi-line code in tool arguments -- Custom reasoning tags (ByteDance Seed-OSS) - ### Debug Tools -**Template Debugger**: `tests/debug-template-parser.cpp` +**Template Debugger**: `tools/parser/debug-template-parser.cpp` -- Usage: `./bin/debug-template-parser path/to/template.jinja` +- Usage: `./bin/llama-debug-template-parser path/to/template.jinja` - Shows detected format, markers, generated parser, and GBNF grammar +**Template Analysis**: `tools/parser/template-analysis.cpp` + +- Usage: `./bin/llama-template-analysis path/to/template.jinja` + **Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2` -- Shows detailed analysis steps -- Displays pattern extraction results -- Lists generated parser structure +- Shows detailed analysis steps, pattern extraction results, and generated parser structure -**PEG Test Builder**: Fluent API for creating test cases +**PEG Test Builder**: Fluent API for creating test cases in `tests/test-chat.cpp`: ```cpp -auto tst = peg_tester("template.jinja"); -tst.test("input") +auto tst = peg_tester("models/templates/Template.jinja"); +tst.test("input text") .reasoning_format(COMMON_REASONING_FORMAT_AUTO) - .tools({tool}) + .tools({tool_json}) + .parallel_tool_calls(true) + .enable_thinking(true) .expect(expected_message) .run(); ``` -## Adding Support for New Templates - -To support a new template format: - -1. **If it follows standard patterns** - The auto-parser should detect it automatically using the three main formats (JSON_NATIVE, TAG_WITH_JSON, TAG_WITH_TAGGED) -2. **If it has unique markers** - Add differential analysis patterns in: - - `compare_reasoning_presence()` for reasoning tags - - `compare_content_values()` for content wrappers - - `extract_tool_section()` for tool call patterns -3. **If it needs special handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block - -## Edge Cases and Quirks - -1. **Forced Thinking**: If `enable_thinking` is true but the model has already started a thought block (e.g., ended the prompt with ``), the parser enters "forced thinking" mode where it immediately expects reasoning content. -2. **Ambiguous Content**: Templates that mix content and tool calls without clear delimiters can be tricky. The analyzer tries to find "common" start/end patterns across multiple examples to be robust. -3. **Double Wrapping**: Some templates (e.g., Functionary) use the same string for both the tool section start and the function prefix (e.g., `name\n......` format | -| Kimi-K2 / Kimi-K2-Instruct / Kimi-K2-Thinking | `PREFIXED_INDEXED` | `functions.name:0` with special markers | -| Apertus-8B-Instruct | `NAME_AS_KEY` | `{"function_name": {...}}` format | -| MiniMax-M2 | `TAG_WITH_JSON` | XML invoke with parameter tags | -| NVIDIA-Nemotron-Nano-v2 | `JSON_NATIVE` | `` wrapper (nested) | -| Mistral-Nemo-Instruct-2407 | `JSON_NATIVE` | `[TOOL_CALLS]` wrapper with id field | -| Functionary v3.1 | `TAG_WITH_JSON` | `` non-nested format | -| Functionary v3.2 | `RECIPIENT_BASED` | `>>>` recipient delimiter format | -| MiMo-VL / Hermes 3 / Qwen 2.5 | `JSON_NATIVE` | `` wrapper | -| Apriel 1.5 | `JSON_NATIVE` | `` wrapper with JSON array | -| Apriel 1.6 Thinker | Reasoning only | Implicit reasoning start | -| Cohere Command-R7B | `JSON_NATIVE` | START_RESPONSE/ACTION/THINKING markers | -| Mistral Small 3.2 | `BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` with ID | -| Devstral | `BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` without ID | -| Ministral-3-14B-Reasoning | Custom reasoning | `[THINK]...[/THINK]` tags | -| IBM Granite | `JSON_NATIVE` | `` + `` | -| ByteDance Seed-OSS | `TAG_WITH_TAGGED` | Custom `` and `` tags | -| Qwen3-Coder | `TAG_WITH_TAGGED` | XML-style tool format | -| Cohere Command-R Plus | `MARKDOWN_BLOCK` | `Action:\n`\`\`\`json\n[...]\n`\`\`` format | +| -------- | ------ | ----- | +| Ministral-3-14B-Reasoning | Reasoning | `[THINK]...[/THINK]` tags | +| NVIDIA-Nemotron-3-Nano-30B | TAG_WITH_TAGGED | Reasoning + tools | +| CohereForAI Command-R7B | JSON_NATIVE | `<\|START_THINKING\|>`/`<\|START_RESPONSE\|>` markers | +| Google Gemma 2 2B | Content only | No tool support | +| Qwen-QwQ-32B | Reasoning | Forced-open thinking | +| NousResearch Hermes 2 Pro | JSON_NATIVE | `` wrapper | +| IBM Granite 3.3 | JSON_NATIVE | `` + `` | +| ByteDance Seed-OSS | TAG_WITH_TAGGED | Custom `` and `` tags | +| Qwen3-Coder | TAG_WITH_TAGGED | XML-style tool format | +| DeepSeek V3.1 | JSON_NATIVE | Forced thinking mode | +| GLM-4.6 | TAG_WITH_TAGGED | `name\n......` format | +| GLM-4.7-Flash | TAG_WITH_TAGGED | Updated GLM format | +| Kimi-K2-Thinking | JSON_NATIVE | Reasoning + JSON tools | +| Apertus-8B-Instruct | JSON_NATIVE | Function name as JSON key | +| MiniMax-M2 | TAG_WITH_JSON | XML invoke with JSON args | +| NVIDIA-Nemotron-Nano-v2 | JSON_NATIVE | `` wrapper (nested) | +| CohereForAI Command-R Plus | JSON_NATIVE | Markdown code block format | +| Mistral-Nemo-Instruct-2407 | JSON_NATIVE | `[TOOL_CALLS]` wrapper with ID field | +| Functionary v3.1 | TAG_WITH_JSON | `` format | +| Functionary v3.2 | Specialized | `>>>` recipient delimiter (dedicated handler) | +| Fireworks Firefunction v2 | TAG_WITH_JSON | Fireworks tool format | +| DeepSeek R1 Distill (Llama/Qwen) | Reasoning | Forced-open thinking | +| llama-cpp-deepseek-r1 | Reasoning | Forced-open thinking | +| Kimi-K2 / Kimi-K2-Instruct | JSON_NATIVE | JSON tools with special markers | +| Llama 3.1/3.2/3.3 | JSON_NATIVE | Standard Llama tool format | +| OpenAI GPT-OSS | Specialized | Channel-based (dedicated handler) | +| Apriel 1.5 | JSON_NATIVE | `` wrapper with JSON array | +| Apriel 1.6 Thinker | Reasoning | Implicit reasoning start | +| Mistral Small 3.2 | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` with call ID | +| Devstral | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` without call ID | +| StepFun 3.5 Flash | TAG_WITH_TAGGED | `` format | -### Currently Unsupported Templates +## Adding Support for New Templates -| Template Family | Model / Variant | Issue Description | -|-----------------|-----------------|-------------------| -| **OpenAI** | `GPT-OSS` | Complex channel markers need new format | +To support a new template format: -### Templates Without Tool Support +1. **If it follows standard patterns** - The auto-parser should detect it automatically using the three formats (JSON_NATIVE, TAG_WITH_JSON, TAG_WITH_TAGGED) +2. **If differential analysis doesn't extract markers correctly** - Add a workaround in the workarounds array in `chat-diff-analyzer.cpp` +3. **If it needs fundamentally different handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block (as done for GPT-OSS, Functionary v3.2, and Ministral) -Some templates genuinely don't support tool calls (this is not a detection bug): +## Edge Cases and Quirks -- **Phi 3.5 Mini** - The official template has no tool handling. Use Phi-4-mini-instruct for function calling, or community fine-tuned versions. -- **Google Gemma 2 2B** - Pure instruction-following model without tool capabilities. - -### TODO / Roadmap - -- [ ] **Fix OpenAI GPT-OSS**: Add handling for channel marker structure. -- [x] **~~Fix Cohere Command-R Plus~~**: Added `MARKDOWN_BLOCK` format for `Action:\n`\`\`\`json` structure. - -### Recent Additions (Dec 2025 - Jan 2026) - -- **RECIPIENT_BASED**: Support for Functionary v3.2's `>>>` recipient delimiter format -- **BRACKET_TAG**: Support for Mistral Small 3.2 and Devstral's `[TOOL_CALLS]...` format -- **Enhanced Content Detection**: Better handling of custom reasoning tags and content wrappers -- **Improved Streaming Support**: Better handling of partial parsing for all supported formats -- **Custom Tag Support**: Support for non-standard reasoning tags like `` (ByteDance) -- **Multi-line Tool Arguments**: Better parsing of complex tool arguments with code blocks -- **MARKDOWN_BLOCK**: Support for Cohere Command-R Plus markdown code block format -- **Implicit Reasoning Support**: Support for templates where reasoning starts implicitly without a start marker. -- **Pure Differential Refactoring (Jan 2026)**: Complete refactoring to eliminate hardcoded patterns: - - Removed all hardcoded pattern lists (previously had `["", "[TOOL_CALLS]", ...]`) - - Added structural extraction helpers (`extract_structural_suffix`, `extract_structural_prefix`) - - Replaced enum-based classification with string-based variant types - - Only remaining heuristic: JSON detection via parse attempt - - All markers now discovered through differential template comparison -- **Three Primary Tool Formats**: Consolidated tool calling formats to JSON_NATIVE, TAG_WITH_JSON, and TAG_WITH_TAGGED for clarity and maintainability - -The auto-parser now successfully handles 25+ different template formats across reasoning-only, tool-calling, and hybrid models, with comprehensive test coverage ensuring robust parsing across streaming and non-streaming scenarios. +1. **Forced Thinking**: If `enable_thinking` is true but the model has already started a thought block (e.g., ended the prompt with ``), the parser enters "forced thinking" mode where it immediately expects reasoning content. +2. **Per-Call vs Per-Section Markers**: Some templates wrap each tool call individually (`per_call_start`/`per_call_end`), others wrap the entire tool section (`tool_section_start`/`tool_section_end`). T2 disambiguates by checking if the second call in a two-call output starts with the section marker. +3. **Double Wrapping**: Some templates (e.g., Functionary) use the same string for both the tool section start and the function prefix (e.g., `` or `[marker]` tokens, ensuring clean extraction. +6. **Workarounds**: A workaround array in `chat-diff-analyzer.cpp` applies post-analysis patches for templates whose differential analysis produces incomplete or incorrect results (e.g., old Qwen thinking, Granite 3.3, Cohere Command-R+, Functionary, DeepSeek-R1-Distill-Qwen). From 29ce31b1a3afa65b3987c8ca79624e0e9aa25b19 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 7 Feb 2026 23:55:53 +0100 Subject: [PATCH 23/39] Fix windows build --- tests/test-chat-template.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 48aac31d75..8b331129b1 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -17,6 +17,10 @@ #include "jinja/lexer.h" #include "jinja/caps.h" +#ifdef WIN32 +#include +#endif + using json = nlohmann::ordered_json; static int main_automated_tests(void); From e590f31f674d0d6830fbe834888788bad2b3e589 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sun, 8 Feb 2026 16:05:21 +0100 Subject: [PATCH 24/39] Revert obsolete server-context change --- tools/server/server-context.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index f82a6cce56..ceafcac179 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include // fix problem with std::min and std::max @@ -2748,15 +2747,7 @@ private: slot.i_batch = -1; - try { - common_sampler_accept(slot.smpl.get(), id, true); - } catch (std::runtime_error & e) { - SLT_ERR(slot, "Error when accepting token for sampler: %s\n", e.what()); - send_error(slot, std::string("Error when accepting token for sampler: ") + e.what(), ERROR_TYPE_SERVER); - slot.release(); - slot.i_batch = -1; - continue; // continue loop of slots - } + common_sampler_accept(slot.smpl.get(), id, true); // here we have synchronized the llama_context (due to the sampling above), so we can do time measurement const int64_t t_current = ggml_time_us(); From d69ec41ee0bd9f1743006dd811a0bf811ba2ae30 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 10 Feb 2026 18:05:09 +0100 Subject: [PATCH 25/39] Post-merge adapt --- common/jinja/caps.cpp | 2 +- tools/parser/template-analysis.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp index c6eef6b464..abd4cd2d9f 100644 --- a/common/jinja/caps.cpp +++ b/common/jinja/caps.cpp @@ -111,7 +111,7 @@ caps caps_get(jinja::program & prog) { // tools return json{nullptr}; }, - [&](bool success, value & messages, value &) { + [&](bool success, value & messages, value &, const std::string &) { auto & content = messages->at(0)->at("content"); caps_print_stats(content, "messages[0].content"); if (has_op(content, "selectattr") || has_op(content, "array_access")) { diff --git a/tools/parser/template-analysis.cpp b/tools/parser/template-analysis.cpp index 0fbcc09390..deb2bafa20 100644 --- a/tools/parser/template-analysis.cpp +++ b/tools/parser/template-analysis.cpp @@ -391,7 +391,8 @@ static void analyze_template(const std::string & template_path) { LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false"); LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false"); LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false"); - LOG_ERR("%srequires_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.requires_typed_content ? "true" : "false"); + LOG_ERR("%ssupports_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_typed_content ? "true" : "false"); + LOG_ERR("%ssupports_string_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_string_content ? "true" : "false"); // ===== DIFFERENTIAL ANALYSIS ===== From efc52dadc8323325136660c822a961d724216704 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Tue, 10 Feb 2026 18:17:11 +0100 Subject: [PATCH 26/39] Add compilation guard to fix Windows compilation errors --- tools/parser/CMakeLists.txt | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/parser/CMakeLists.txt b/tools/parser/CMakeLists.txt index 73157b0a0e..55e0c63437 100644 --- a/tools/parser/CMakeLists.txt +++ b/tools/parser/CMakeLists.txt @@ -1,10 +1,13 @@ -set(TARGET llama-debug-template-parser) -add_executable(${TARGET} debug-template-parser.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) -target_compile_features(${TARGET} PRIVATE cxx_std_17) +if (NOT WIN32 OR NOT BUILD_SHARED_LIBS) + # this tool is disabled on Windows when building with shared libraries because it uses internal functions not exported with LLAMA_API + set(TARGET llama-debug-template-parser) + add_executable(${TARGET} debug-template-parser.cpp) + target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) + target_compile_features(${TARGET} PRIVATE cxx_std_17) -if(LLAMA_TOOLS_INSTALL) - install(TARGETS ${TARGET} RUNTIME) + if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} RUNTIME) + endif() endif() set(TARGET llama-template-analysis) From 56ca124850d7c462d31bdb9289481a063f0d7a3a Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Wed, 11 Feb 2026 22:41:58 +0100 Subject: [PATCH 27/39] Document helpers --- common/chat-auto-parser-helpers.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h index 53d3454566..47e7a2a3d8 100644 --- a/common/chat-auto-parser-helpers.h +++ b/common/chat-auto-parser-helpers.h @@ -10,16 +10,47 @@ std::string trim_trailing_newlines(const std::string & str); // calculate a diff split (longest common prefix, longest common suffix excluding prefix, // mismatched part on the left, mismatched part on the right) between two strings +// account for markers - align prefix and suffix endings so that they end on markers +// * eg.: +// calculate_diff_split("
", "

Something

") -> +// { "prefix": "" (not: "<"), "suffix": "", "left": "
", "right": "

Something

" } +// calculate_diff_split("Something", "") -> +// { "prefix": "", "suffix": "", "left": "Something", "right": "" } diff_split calculate_diff_split(const std::string & left, const std::string & right); // Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right` +// Returns empty string if there's no common prefix +// * eg.: +// until_common_prefix("really want a FUNCTION call", "FUNCTION alpha", "FUNCTION beta") -> "really want a " +// until_common_prefix("", "", "") -> "" +// until_common_prefix("some text", "1234", "abcd") -> "" +// until_common_prefix("one arg two args three args four", "argument alpha", "argument beta") -> "one "" std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right); // Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right` +// Returns empty string if there's no common suffix +// Mirror function of `until_common_prefix` +// * eg.: +// after_common_suffix("really want a FUNCTION call", "first FUNCTION", "second FUNCTION") -> " call" +// after_common_suffix("one arg two-args three args four", "alpha-args", "beta-args") -> " three args four" std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right); // Segmentize text into markers and non-marker fragments +// * eg.: +// segmentize_markers("The site title
Here's some content
" -> +// [ (MARKER, ""), (MARKER, ""), (MARKER, ""), (TEXT, "The site title"), (MARKER, ""), +// (MARKER, ""), (MARKER, "
"), (TEXT, "Here's some "), (MARKER, ""), (TEXT, "content"), (MARKER, ""), +// (MARKER, "
"), (MARKER, ""), (MARKER, "") +// ] +// segmentize_markers("<|tool_call|>[args]{ are here }[/args]<|tool_call_end|>") -> +// [ (MARKER, "<|tool_call|>"), (MARKER, "[args]"), (TEXT, "{ are here }"), (MARKER, "[/args]"), (MARKER, "<|tool_call_end|>") ] std::vector segmentize_markers(const std::string & text); // Prune whitespace-only segments from a vector of segments +// * eg.: +// segmentize_markers("\n\n\n \n\n\n") -> +// X = [ (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n \n"), +// (MARKER, ""), (TEXT, "\n"), (MARKER, ""), (TEXT, "\n"), (MARKER, "") ] +// prune_whitespace_segments(X) -> [ (MARKER, ""), (MARKER, ""), (MARKER, ""), (MARKER, ""), +// (MARKER, ""), (MARKER, "") ] std::vector prune_whitespace_segments(const std::vector & segments); From e40d4cd70658bb0736ec134559a2996cc48b2c95 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Wed, 11 Feb 2026 22:53:02 +0100 Subject: [PATCH 28/39] Get rid of some crazy formatting --- common/chat-diff-analyzer.cpp | 196 +++++++++++++++------------------- 1 file changed, 85 insertions(+), 111 deletions(-) diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 53906102d8..90ac15f2f0 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -20,77 +20,76 @@ static std::vector void { - if (tmpl.src.find("content.split('
')") != std::string::npos && - analysis.reasoning == reasoning_mode::NONE) { - analysis.reasoning = reasoning_mode::FORCED_OPEN; - analysis.markers.reasoning_start = ""; - analysis.markers.reasoning_end = ""; - analysis.preserved_tokens.push_back(""); - analysis.preserved_tokens.push_back(""); - LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET); + if (tmpl.src.find("content.split('
')") != std::string::npos && analysis.reasoning == reasoning_mode::NONE) { + analysis.reasoning = reasoning_mode::FORCED_OPEN; + analysis.markers.reasoning_start = ""; + analysis.markers.reasoning_end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET); } }, // Granite 3.3, with separate reasoning and content markers [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { if (tmpl.src.find("Write your thoughts between and write your response between " "") != std::string::npos) { - analysis.reasoning = reasoning_mode::TAG_BASED; - analysis.markers.reasoning_start = ""; - analysis.markers.reasoning_end = ""; - analysis.preserved_tokens.push_back(""); - analysis.preserved_tokens.push_back(""); - analysis.content = content_mode::WRAPPED_WITH_REASONING; - analysis.markers.content_start = ""; - analysis.markers.content_end = ""; - analysis.preserved_tokens.push_back(""); - analysis.preserved_tokens.push_back(""); - LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET); + analysis.reasoning = reasoning_mode::TAG_BASED; + analysis.markers.reasoning_start = ""; + analysis.markers.reasoning_end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + analysis.content = content_mode::WRAPPED_WITH_REASONING; + analysis.markers.content_start = ""; + analysis.markers.content_end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET); } }, // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|> [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos && - tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.markers.content_start.empty()) { - analysis.content = content_mode::ALWAYS_WRAPPED; - analysis.markers.content_start = "<|CHATBOT_TOKEN|>"; - analysis.markers.content_end = "<|END_OF_TURN_TOKEN|>"; - analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>"); - analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>"); - LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET); + tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && + analysis.markers.content_start.empty()) { + analysis.content = content_mode::ALWAYS_WRAPPED; + analysis.markers.content_start = "<|CHATBOT_TOKEN|>"; + analysis.markers.content_end = "<|END_OF_TURN_TOKEN|>"; + analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>"); + analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>"); + LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET); } }, // Functionary - no tool call section delimiter [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", " "\"code_interpreter\") | list | length > 0") != std::string::npos) { - analysis.content = content_mode::PLAIN; - analysis.markers.content_end = ""; - analysis.markers.func_name_prefix = ""; - analysis.markers.tool_section_start = ""; - analysis.markers.tool_section_end = ""; - analysis.markers.per_call_start = ""); - analysis.preserved_tokens.push_back("<|eom_id|>"); - analysis.preserved_tokens.push_back(""); - analysis.preserved_tokens.push_back(""); - LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET); - } + analysis.content = content_mode::PLAIN; + analysis.markers.content_end = ""; + analysis.markers.func_name_prefix = ""; + analysis.markers.tool_section_start = ""; + analysis.markers.tool_section_end = ""; + analysis.markers.per_call_start = ""); + analysis.preserved_tokens.push_back("<|eom_id|>"); + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET); + } }, // DeepSeek-R1-Distill-Qwen [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { - if (tmpl.src.find( - "{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>'") != + if (tmpl.src.find("{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>'") != std::string::npos) { - analysis.markers.tool_section_start = "<|tool▁calls▁begin|>"; - analysis.markers.tool_section_end = "<|tool▁calls▁end|>"; - analysis.markers.per_call_start = "<|tool▁call▁begin|>function"; - analysis.markers.func_name_prefix = "<|tool▁sep|>"; - analysis.markers.per_call_end = "<|tool▁call▁end|>"; - analysis.markers.func_close = "```"; + analysis.markers.tool_section_start = "<|tool▁calls▁begin|>"; + analysis.markers.tool_section_end = "<|tool▁calls▁end|>"; + analysis.markers.per_call_start = "<|tool▁call▁begin|>function"; + analysis.markers.func_name_prefix = "<|tool▁sep|>"; + analysis.markers.per_call_end = "<|tool▁call▁end|>"; + analysis.markers.func_close = "```"; } } }); @@ -126,40 +125,16 @@ static json build_tool_call(const std::string & name, const json & args, const s } static json first_tool_call_zero_args = build_tool_call("foofoo", json::object(), "call00001"); -static json first_tool_call_one_arg = build_tool_call("foofoo", - json{ - { "first", "XXXX" } -}, - "call00001"); -static json first_tool_call_one_arg_other_val = build_tool_call("foofoo", - json{ - { "first", "YYYY" } -}, - "call00001"); -static json first_tool_call_other_arg = build_tool_call("foofoo", - json{ - { "second", "YYYY" } -}, - "call00001"); -static json first_tool_call = build_tool_call("foofoo", - json{ - { "first", "XXXX" }, - { "second", "YYYY" } -}, - "call00001"); -static json second_tool_call = build_tool_call("barbar", - json{ - { "first", "XXXX" }, - { "second", "YYYY" } -}, - "call00002"); -// Tool call variants with different IDs for call_id detection -static json first_tool_call_alt_id = build_tool_call("foofoo", - json{ - { "first", "XXXX" }, - { "second", "YYYY" } -}, - "call99999"); +static json first_tool_call_one_arg = build_tool_call("foofoo", json{ "first", "XXXX" }, "call00001"); +static json first_tool_call_one_arg_other_val = build_tool_call("foofoo",json{{ "first", "YYYY" }}, "call00001"); +static json first_tool_call_other_arg = build_tool_call("foofoo",json{ { "second", "YYYY" }}, "call00001"); + +static json first_tool_call = + build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call00001"); +static json second_tool_call = + build_tool_call("barbar", json{ { "first", "XXXX" }, { "second", "YYYY" }}, "call00002"); +static json first_tool_call_alt_id = + build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call99999"); std::string differential_analyzer::apply_template(const common_chat_template & tmpl, const template_params & params) { templates_params tmpl_params; @@ -441,11 +416,11 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & }; json assistant_reasoning_tools = json{ - { "role", "assistant" }, - { "content", nullptr }, - { "reasoning_content", "Let me think." }, + { "role", "assistant" }, + { "content", nullptr }, + { "reasoning_content", "Let me think." }, { "tool_calls", - json::array({ build_tool_call("foofoo", json{ { "first", "VVVV" }, { "second", "XXXX" } }) }) } + json::array({ build_tool_call("foofoo", json{ { "first", "VVVV" }, { "second", "XXXX" } }) }) } }; template_params params; @@ -532,8 +507,8 @@ void differential_analyzer::compare_content_values(const common_chat_template & }; json assistant_with_tools = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ build_tool_call("test_func", json{ { "arg1", "value1" } }) }) } }; @@ -685,8 +660,7 @@ void differential_analyzer::analyze_tool_call_format_json_native(const std::stri int json_end = clean_haystack.find_last_of('}'); std::string cut = clean_haystack.substr(json_start, json_end - json_start + 1); json call_struct = json::parse(cut); - auto register_field = [&](const std::string & prefix, - const nlohmann::detail::iteration_proxy_value & subel) { + auto register_field = [&](const std::string & prefix, const nlohmann::detail::iteration_proxy_value & subel) { if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) { result.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) { @@ -883,14 +857,14 @@ void differential_analyzer::analyze_tools(const common_chat_template & tmpl, dif void differential_analyzer::check_per_call_markers(const common_chat_template & tmpl, diff_analysis_result & result) { json assistant_one_tool = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call }) } }; json assistant_two_tools = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call, second_tool_call }) } }; @@ -1169,14 +1143,14 @@ void differential_analyzer::extract_function_markers(const common_chat_template void differential_analyzer::extract_argument_separator(const common_chat_template & tmpl, diff_analysis_result & result) { json assistant_one_arg = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call_one_arg }) } }; json assistant_two_args = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call }) } }; @@ -1207,14 +1181,14 @@ void differential_analyzer::extract_argument_separator(const common_chat_templat void differential_analyzer::extract_args_markers(const common_chat_template & tmpl, diff_analysis_result & result) { json assistant_no_args = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call_zero_args }) } }; json assistant_with_args = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call_one_arg }) } }; @@ -1266,14 +1240,14 @@ void differential_analyzer::extract_args_markers(const common_chat_template & tm void differential_analyzer::extract_call_id_markers(const common_chat_template & tmpl, diff_analysis_result & result) { json assistant_id1 = json{ - { "role", "assistant" }, + { "role", "assistant" }, { "content", "" }, { "tool_calls", json::array({ first_tool_call }) } }; json assistant_id2 = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call_alt_id }) } }; @@ -1458,14 +1432,14 @@ void differential_analyzer::analyze_arguments(const common_chat_template & tmpl, void differential_analyzer::extract_argument_name_markers(const common_chat_template & tmpl, diff_analysis_result & result) { json assistant_first_arg = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call_one_arg }) } }; json assistant_second_arg = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant" }, + { "content", "" }, { "tool_calls", json::array({ first_tool_call_other_arg }) } }; From 3096ecaa951cf6ffa235ede88d1ad43dc5d37a47 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Wed, 11 Feb 2026 23:44:52 +0100 Subject: [PATCH 29/39] One more crazy spacing out --- common/chat-diff-analyzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 90ac15f2f0..b66ef400d1 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -1503,7 +1503,7 @@ void differential_analyzer::extract_argument_name_markers(const common_chat_temp auto left_seg = segmentize_markers(diff.left); if (left_seg.size() == 1) { // only the name + maybe extra whitespace / normal chars in differing part result.markers.arg_name_suffix = diff.left.substr(5); - auto suf_seg = segmentize_markers(diff.suffix); + auto suf_seg= segmentize_markers(diff.suffix); for (size_t i = 0; i < suf_seg.size(); i++) { result.markers.arg_name_suffix += suf_seg[i].value; if (suf_seg[i].type == segment_type::MARKER) { From 822fd2bee965f4cf674df4102a1df60187ba1a0e Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Thu, 12 Feb 2026 17:22:59 +0100 Subject: [PATCH 30/39] Whoops --- common/chat-diff-analyzer.cpp | 6 +++--- tools/parser/debug-template-parser.cpp | 14 +++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index b66ef400d1..080fecfce2 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -125,9 +125,9 @@ static json build_tool_call(const std::string & name, const json & args, const s } static json first_tool_call_zero_args = build_tool_call("foofoo", json::object(), "call00001"); -static json first_tool_call_one_arg = build_tool_call("foofoo", json{ "first", "XXXX" }, "call00001"); -static json first_tool_call_one_arg_other_val = build_tool_call("foofoo",json{{ "first", "YYYY" }}, "call00001"); -static json first_tool_call_other_arg = build_tool_call("foofoo",json{ { "second", "YYYY" }}, "call00001"); +static json first_tool_call_one_arg = build_tool_call("foofoo", {{ "first", "XXXX" }}, "call00001"); +static json first_tool_call_one_arg_other_val = build_tool_call("foofoo", {{ "first", "YYYY" }}, "call00001"); +static json first_tool_call_other_arg = build_tool_call("foofoo", {{ "second", "YYYY" }}, "call00001"); static json first_tool_call = build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call00001"); diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp index 06ceb0f02f..c0e29c548a 100644 --- a/tools/parser/debug-template-parser.cpp +++ b/tools/parser/debug-template-parser.cpp @@ -39,6 +39,7 @@ struct debug_options { bool with_tools = true; bool generation_prompt = true; bool enable_reasoning = true; + bool debug_jinja = false; output_mode mode = output_mode::BOTH; input_message_type input_message = input_message_type::NONE; }; @@ -88,6 +89,7 @@ static void print_usage(const char * program_name) { LOG_ERR(" --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n"); LOG_ERR(" --enable-reasoning=0|1 Enable reasoning parsing (default: 1)\n"); LOG_ERR(" --output=MODE Output mode: analysis, template, both (default: both)\n"); + LOG_ERR(" --debug-jinja Enable Jinja fine-grained debug\n"); LOG_ERR(" --input-message=TYPE Message type to render:\n"); LOG_ERR(" content_only, reasoning_content, tool_call_only,\n"); LOG_ERR(" content_tool_call, reasoning_tool_call,\n"); @@ -112,7 +114,9 @@ static bool parse_options(int argc, char ** argv, debug_options & opts) { for (int i = 2; i < argc; ++i) { std::string arg = argv[i]; - if (arg == "--no-tools") { + if (arg == "--debug-jinja") { + opts.debug_jinja = true; + } else if (arg == "--no-tools") { opts.with_tools = false; } else if (arg.rfind("--generation-prompt=", 0) == 0) { opts.generation_prompt = parse_bool_option(arg.substr(20)); @@ -339,15 +343,15 @@ int main(int argc, char ** argv) { // Set log level to most verbose to capture all debug output common_log_set_verbosity_thold(99); - if (std::getenv("LLAMA_DEBUG_JINJA") != nullptr) { - jinja::enable_debug(true); - } - debug_options opts; if (!parse_options(argc, argv, opts)) { return 1; } + if (opts.debug_jinja || std::getenv("LLAMA_DEBUG_JINJA") != nullptr) { + jinja::enable_debug(true); + } + std::string template_source; try { // Check if the file is a GGUF file From 28fcef67c06fb0e2fb73a0d8f6cde0630d76b727 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 13 Feb 2026 00:55:35 +0100 Subject: [PATCH 31/39] -> Refactor autoparser analyzer structure -> Fix content truncation -> Fix errors in capability detection due to non-empty assistant message -> Add missing debug prints for Jinja --- common/chat-auto-parser-generator.cpp | 159 ++- common/chat-diff-analyzer.cpp | 1124 ++++++++--------- common/chat-diff-analyzer.h | 269 ++-- common/chat-peg-parser.cpp | 4 +- common/chat.cpp | 2 +- common/jinja/caps.cpp | 11 +- common/jinja/runtime.cpp | 4 +- .../templates/deepseek-ai-DeepSeek-V3.1.jinja | 6 +- tests/test-chat-auto-parser.cpp | 98 +- tests/test-chat-peg-parser.cpp | 2 +- tests/test-chat-template.cpp | 58 +- tools/parser/debug-template-parser.cpp | 62 +- 12 files changed, 903 insertions(+), 896 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index e9fe71c1d6..13ec14fb64 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -30,8 +30,8 @@ common_chat_params universal_peg_generator::generate_parser(const common_chat_te const struct templates_params & inputs, const diff_analysis_result & analysis) { // Check for thinking forced open - bool thinking_forced_open = (analysis.reasoning == reasoning_mode::FORCED_OPEN); - bool thinking_forced_closed = (analysis.reasoning == reasoning_mode::FORCED_CLOSED); + bool thinking_forced_open = (analysis.reasoning.mode == reasoning_mode::FORCED_OPEN); + bool thinking_forced_closed = (analysis.reasoning.mode == reasoning_mode::FORCED_CLOSED); // Build the parser using the analysis results auto parser = build_parser(analysis, inputs, thinking_forced_open, thinking_forced_closed); @@ -44,7 +44,7 @@ common_chat_params universal_peg_generator::generate_parser(const common_chat_te data.parser = parser.save(); // Build grammar if tools are present - bool has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + bool has_tools = analysis.tools.format.mode != tool_format::NONE && inputs.tools.is_array() && !inputs.tools.empty(); bool include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; if (include_grammar) { @@ -60,9 +60,9 @@ common_chat_params universal_peg_generator::generate_parser(const common_chat_te }); // Set grammar triggers based on tool section markers (fall back to per-call markers) - std::string trigger_marker = !analysis.markers.tool_section_start.empty() - ? analysis.markers.tool_section_start - : analysis.markers.per_call_start; + std::string trigger_marker = !analysis.tools.format.section_start.empty() + ? analysis.tools.format.section_start + : analysis.tools.format.per_call_start; if (!trigger_marker.empty()) { data.grammar_triggers = { { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker } @@ -79,26 +79,24 @@ common_peg_arena universal_peg_generator::build_parser(const diff_analysis_resul bool thinking_forced_closed) { return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { p.set_allow_python_dict_format(true); - const auto & m = analysis.markers; - common_peg_parser reasoning = p.eps(); bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; bool enable_thinking = inputs.enable_thinking; - if (extract_reasoning && enable_thinking && analysis.reasoning != reasoning_mode::NONE) { + if (extract_reasoning && enable_thinking && analysis.reasoning.mode != reasoning_mode::NONE) { if (thinking_forced_open || thinking_forced_closed) { // Thinking is forced open OR forced closed with enable_thinking=true // In both cases, expect only the closing tag (opening was in template) - reasoning = p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end; - } else if (analysis.reasoning == reasoning_mode::TAG_BASED || - analysis.reasoning == reasoning_mode::TOOLS_ONLY) { + reasoning = p.reasoning(p.until(analysis.reasoning.end)) + analysis.reasoning.end; + } else if (analysis.reasoning.mode == reasoning_mode::TAG_BASED || + analysis.reasoning.mode == reasoning_mode::TOOLS_ONLY) { // Standard tag-based reasoning OR tools-only mode (reasoning appears with tools) // Both use the same tag-based pattern if markers are available - if (!m.reasoning_start.empty() && !m.reasoning_end.empty()) { - reasoning = p.optional(m.reasoning_start + p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end); + if (!analysis.reasoning.start.empty() && !analysis.reasoning.end.empty()) { + reasoning = p.optional(analysis.reasoning.start + p.reasoning(p.until(analysis.reasoning.end)) + analysis.reasoning.end); } - } else if (analysis.reasoning == reasoning_mode::DELIMITER) { - reasoning = p.optional(p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end); + } else if (analysis.reasoning.mode == reasoning_mode::DELIMITER) { + reasoning = p.optional(p.reasoning(p.until(analysis.reasoning.end)) + analysis.reasoning.end); } } @@ -109,19 +107,19 @@ common_peg_arena universal_peg_generator::build_parser(const diff_analysis_resul return reasoning + p.space() + p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end(); } - if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && analysis.supports_tools) { + if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && analysis.jinja_caps.supports_tool_calls) { return build_tool_parser(p, analysis, inputs, reasoning); } - if (analysis.content == content_mode::ALWAYS_WRAPPED && - !m.content_start.empty() && !m.content_end.empty()) { + if (analysis.content.mode == content_mode::ALWAYS_WRAPPED && + !analysis.content.start.empty() && !analysis.content.end.empty()) { - bool extracting_reasoning = extract_reasoning && enable_thinking && analysis.reasoning != reasoning_mode::NONE; + bool extracting_reasoning = extract_reasoning && enable_thinking && analysis.reasoning.mode != reasoning_mode::NONE; if (extracting_reasoning) { - return reasoning + m.content_start + p.content(p.until(m.content_end)) + m.content_end + p.end(); + return reasoning + analysis.content.start + p.content(p.until(analysis.content.end)) + analysis.content.end + p.end(); } - return p.content(p.until(m.content_start)) + m.content_start + p.content(p.until(m.content_end)) + m.content_end + p.end(); + return p.content(p.until(analysis.content.start)) + analysis.content.start + p.content(p.until(analysis.content.end)) + analysis.content.end + p.end(); } return reasoning + p.content(p.rest()) + p.end(); }); @@ -133,7 +131,7 @@ common_peg_parser universal_peg_generator::build_tool_parser( const templates_params & inputs, const common_peg_parser & reasoning) { - switch (analysis.tools) { + switch (analysis.tools.format.mode) { case tool_format::JSON_NATIVE: return build_tool_parser_json_native(p, analysis, inputs, reasoning); case tool_format::TAG_WITH_JSON: @@ -151,42 +149,40 @@ common_peg_parser universal_peg_generator::build_tool_parser_json_native( const templates_params & inputs, const common_peg_parser & reasoning) { - const auto & m = analysis.markers; - // Build effective field names with dot notation if function_field is set - std::string name_field = analysis.name_field; - std::string args_field = analysis.args_field; + std::string name_field = analysis.tools.format.name_field; + std::string args_field = analysis.tools.format.args_field; - if (!analysis.function_field.empty() && - analysis.function_field != "function" && + if (!analysis.tools.format.function_field.empty() && + analysis.tools.format.function_field != "function" && name_field.find('.') == std::string::npos) { - name_field = analysis.function_field + "." + name_field; - args_field = analysis.function_field + "." + args_field; + name_field = analysis.tools.format.function_field + "." + name_field; + args_field = analysis.tools.format.function_field + "." + args_field; } auto tools_parser = p.standard_json_tools( - m.tool_section_start, - m.tool_section_end, + analysis.tools.format.section_start, + analysis.tools.format.section_end, inputs.tools, inputs.parallel_tool_calls, inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, name_field, args_field, - analysis.tools_array_wrapped, - analysis.fun_name_is_key, - analysis.id_field, - analysis.gen_id_field, - analysis.parameter_order + analysis.tools.format.tools_array_wrapped, + analysis.tools.format.fun_name_is_key, + analysis.tools.format.id_field, + analysis.tools.format.gen_id_field, + analysis.tools.format.parameter_order ); // Handle content wrappers if present - if (analysis.content == content_mode::ALWAYS_WRAPPED && - !m.content_start.empty() && !m.content_end.empty()) { - auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end); + if (analysis.content.mode == content_mode::ALWAYS_WRAPPED && + !analysis.content.start.empty() && !analysis.content.end.empty()) { + auto wrapped_content = p.optional(analysis.content.start + p.content(p.until(analysis.content.end)) + analysis.content.end); return reasoning + wrapped_content + tools_parser + p.end(); } - auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start); + auto content_before_tools = analysis.tools.format.section_start.empty() ? p.eps() : p.until(analysis.tools.format.section_start); return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); } @@ -196,7 +192,6 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_json( const templates_params & inputs, const common_peg_parser & reasoning) { - const auto & m = analysis.markers; common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { @@ -206,17 +201,17 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_json( // Build call_id parser based on position (if supported) common_peg_parser call_id_section = p.eps(); - if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && - !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { - call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; + if (analysis.tools.call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !analysis.tools.call_id.prefix.empty() && !analysis.tools.call_id.suffix.empty()) { + call_id_section = p.optional(analysis.tools.call_id.prefix + p.tool_id(p.until(analysis.tools.call_id.suffix))) + analysis.tools.call_id.suffix; } - auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + + auto func_parser = p.tool_open(analysis.tools.function.name_prefix + p.tool_name(p.literal(name)) + analysis.tools.function.name_suffix) + call_id_section + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)); - if (!m.func_close.empty()) { - func_parser = func_parser + m.func_close; + if (!analysis.tools.function.close.empty()) { + func_parser = func_parser + analysis.tools.function.close; } tool_choice |= p.rule("tool-" + name, func_parser); @@ -226,30 +221,26 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_json( common_peg_parser tool_calls = p.eps(); - if (!m.per_call_start.empty()) { - auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end; + if (!analysis.tools.format.per_call_start.empty()) { + auto wrapped_call = analysis.tools.format.per_call_start + tool_choice + analysis.tools.format.per_call_end; if (inputs.parallel_tool_calls) { tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); } else { tool_calls = p.trigger_rule("tool-call", wrapped_call); } - if (!m.tool_section_start.empty()) { - tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + - tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); + if (!analysis.tools.format.section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(analysis.tools.format.section_start) + p.space() + + tool_calls + p.space() + (analysis.tools.format.section_end.empty() ? p.end() : p.literal(analysis.tools.format.section_end))); } } else { - std::string separator = m.call_separator; - if (separator.empty()) { - separator = ", "; // Default - } - + std::string separator = ", "; // Default if (inputs.parallel_tool_calls) { tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end); + analysis.tools.format.section_start + tool_choice + p.zero_or_more(separator + tool_choice) + analysis.tools.format.section_end); } else { tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + tool_choice + m.tool_section_end); + analysis.tools.format.section_start + tool_choice + analysis.tools.format.section_end); } } @@ -257,7 +248,7 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_json( tool_calls = p.optional(tool_calls); } - std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; + std::string trigger_marker = !analysis.tools.format.section_start.empty() ? analysis.tools.format.section_start : analysis.tools.format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); } @@ -268,7 +259,6 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( const templates_params & inputs, const common_peg_parser & reasoning) { - const auto & m = analysis.markers; common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { @@ -293,13 +283,13 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( auto type = param_schema.value("type", "object"); auto arg = p.tool_arg( - p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix + + p.tool_arg_open(analysis.tools.arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + analysis.tools.arguments.name_suffix) + analysis.tools.arguments.value_prefix + (type == "string" ? - p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix), + p.tool_arg_string_value(p.schema(p.until(analysis.tools.arguments.value_suffix), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : p.tool_arg_json_value(p.schema(p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) + - p.tool_arg_close(p.literal(m.arg_value_suffix)) + p.tool_arg_close(p.literal(analysis.tools.arguments.value_suffix)) ); if (is_required) { @@ -320,23 +310,23 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( // Build call_id parser based on position (if supported) common_peg_parser call_id_section = p.eps(); - if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && - !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { - call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; + if (analysis.tools.call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !analysis.tools.call_id.prefix.empty() && !analysis.tools.call_id.suffix.empty()) { + call_id_section = p.optional(analysis.tools.call_id.prefix + p.tool_id(p.until(analysis.tools.call_id.suffix))) + analysis.tools.call_id.suffix; } - auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + + auto func_parser = p.tool_open(analysis.tools.function.name_prefix + p.tool_name(p.literal(name)) + analysis.tools.function.name_suffix) + call_id_section + p.space() + args_seq; - if (!m.func_close.empty()) { - func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close)); - } else if (!m.per_call_end.empty()) { + if (!analysis.tools.function.close.empty()) { + func_parser = func_parser + p.space() + p.tool_close(p.literal(analysis.tools.function.close)); + } else if (!analysis.tools.format.per_call_end.empty()) { // When there's no func_close but there is a per_call_end marker, use peek() to ensure // we only emit tool_close when we can actually see the closing marker. This prevents // premature closing during partial parsing when we've seen e.g. "" (end) or "" prefix that failed to match. - func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end))); + func_parser = func_parser + p.tool_close(p.peek(p.literal(analysis.tools.format.per_call_end))); } else { func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper } @@ -348,29 +338,26 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( common_peg_parser tool_calls = p.eps(); - if (!m.per_call_start.empty()) { - auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end; + if (!analysis.tools.format.per_call_start.empty()) { + auto wrapped_call = analysis.tools.format.per_call_start + p.space() + tool_choice + p.space() + analysis.tools.format.per_call_end; if (inputs.parallel_tool_calls) { tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); } else { tool_calls = p.trigger_rule("tool-call", wrapped_call); } - if (!m.tool_section_start.empty()) { - tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + - tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); + if (!analysis.tools.format.section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(analysis.tools.format.section_start) + p.space() + + tool_calls + p.space() + (analysis.tools.format.section_end.empty() ? p.end() : p.literal(analysis.tools.format.section_end))); } } else { - std::string separator = m.call_separator; - if (separator.empty()) { - separator = ", "; // Default - } + std::string separator = ", "; // Default if (inputs.parallel_tool_calls) { tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end); + analysis.tools.format.section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + analysis.tools.format.section_end); } else { tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end); + analysis.tools.format.section_start + p.space() + tool_choice + p.space() + analysis.tools.format.section_end); } } @@ -378,7 +365,7 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( tool_calls = p.optional(tool_calls); } - std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; + std::string trigger_marker = !analysis.tools.format.section_start.empty() ? analysis.tools.format.section_start : analysis.tools.format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); } diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 080fecfce2..03978f6e57 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -20,76 +20,77 @@ static std::vector void { - if (tmpl.src.find("content.split('
')") != std::string::npos && analysis.reasoning == reasoning_mode::NONE) { - analysis.reasoning = reasoning_mode::FORCED_OPEN; - analysis.markers.reasoning_start = ""; - analysis.markers.reasoning_end = ""; - analysis.preserved_tokens.push_back(""); - analysis.preserved_tokens.push_back(""); - LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET); + if (tmpl.src.find("content.split('')") != std::string::npos && + analysis.reasoning.mode == reasoning_mode::NONE) { + analysis.reasoning.mode = reasoning_mode::FORCED_OPEN; + analysis.reasoning.start = ""; + analysis.reasoning.end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET); } }, // Granite 3.3, with separate reasoning and content markers [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { if (tmpl.src.find("Write your thoughts between and write your response between " "") != std::string::npos) { - analysis.reasoning = reasoning_mode::TAG_BASED; - analysis.markers.reasoning_start = ""; - analysis.markers.reasoning_end = ""; - analysis.preserved_tokens.push_back(""); - analysis.preserved_tokens.push_back(""); - analysis.content = content_mode::WRAPPED_WITH_REASONING; - analysis.markers.content_start = ""; - analysis.markers.content_end = ""; - analysis.preserved_tokens.push_back(""); - analysis.preserved_tokens.push_back(""); - LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET); + analysis.reasoning.mode = reasoning_mode::TAG_BASED; + analysis.reasoning.start = ""; + analysis.reasoning.end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + analysis.content.mode = content_mode::WRAPPED_WITH_REASONING; + analysis.content.start = ""; + analysis.content.end = ""; + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET); } }, // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|> [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos && - tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && - analysis.markers.content_start.empty()) { - analysis.content = content_mode::ALWAYS_WRAPPED; - analysis.markers.content_start = "<|CHATBOT_TOKEN|>"; - analysis.markers.content_end = "<|END_OF_TURN_TOKEN|>"; - analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>"); - analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>"); - LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET); + tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.content.start.empty()) { + analysis.content.mode = content_mode::ALWAYS_WRAPPED; + analysis.content.start = "<|CHATBOT_TOKEN|>"; + analysis.content.end = "<|END_OF_TURN_TOKEN|>"; + analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>"); + analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>"); + LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET); } }, // Functionary - no tool call section delimiter [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", " "\"code_interpreter\") | list | length > 0") != std::string::npos) { - analysis.content = content_mode::PLAIN; - analysis.markers.content_end = ""; - analysis.markers.func_name_prefix = ""; - analysis.markers.tool_section_start = ""; - analysis.markers.tool_section_end = ""; - analysis.markers.per_call_start = ""); - analysis.preserved_tokens.push_back("<|eom_id|>"); - analysis.preserved_tokens.push_back(""); - analysis.preserved_tokens.push_back(""); - LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET); - } + analysis.content.mode = content_mode::PLAIN; + analysis.content.end = ""; + analysis.tools.function.name_prefix = ""; + analysis.tools.format.section_start = ""; + analysis.tools.format.section_end = ""; + analysis.tools.format.per_call_start = ""); + analysis.preserved_tokens.push_back("<|eom_id|>"); + analysis.preserved_tokens.push_back(""); + analysis.preserved_tokens.push_back(""); + LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET); + } }, // DeepSeek-R1-Distill-Qwen [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { - if (tmpl.src.find("{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>'") != + if (tmpl.src.find( + "{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>'") != std::string::npos) { - analysis.markers.tool_section_start = "<|tool▁calls▁begin|>"; - analysis.markers.tool_section_end = "<|tool▁calls▁end|>"; - analysis.markers.per_call_start = "<|tool▁call▁begin|>function"; - analysis.markers.func_name_prefix = "<|tool▁sep|>"; - analysis.markers.per_call_end = "<|tool▁call▁end|>"; - analysis.markers.func_close = "```"; + analysis.tools.format.section_start = "<|tool▁calls▁begin|>"; + analysis.tools.format.section_end = "<|tool▁calls▁end|>"; + analysis.tools.format.per_call_start = "<|tool▁call▁begin|>function"; + analysis.tools.function.name_prefix = "<|tool▁sep|>"; + analysis.tools.format.per_call_end = "<|tool▁call▁end|>"; + analysis.tools.function.close = "```"; } } }); @@ -191,14 +192,12 @@ diff_analysis_result differential_analyzer::analyze(const common_chat_template & LOG_DBG(ANSI_PURPLE "=== Starting differential analysis ===\n" ANSI_RESET); - auto caps = tmpl.original_caps(); - result.supports_tools = caps.supports_tools || caps.supports_tool_calls; - result.supports_parallel_calls = caps.supports_parallel_tool_calls; + result.jinja_caps = tmpl.original_caps(); - analyze_reasoning(tmpl, result); - analyze_content(tmpl, result); - if (result.supports_tools) { - analyze_tools(tmpl, result); + result.reasoning = analyze_reasoning(tmpl, result.jinja_caps.supports_tool_calls); + result.content = analyze_content(tmpl, result.reasoning); + if (result.jinja_caps.supports_tool_calls) { + result.tools = analyze_tools(tmpl, result.jinja_caps, result.reasoning); } collect_preserved_tokens(result); @@ -211,18 +210,21 @@ diff_analysis_result differential_analyzer::analyze(const common_chat_template & return result; } -void differential_analyzer::analyze_reasoning(const common_chat_template & tmpl, diff_analysis_result & result) { +reasoning_analysis differential_analyzer::analyze_reasoning(const common_chat_template & tmpl, bool supports_tools) { LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET); + reasoning_analysis result; + compare_reasoning_presence(tmpl, result); compare_thinking_enabled(tmpl, result); - if (result.supports_tools) { + if (supports_tools) { compare_reasoning_scope(tmpl, result); } + + return result; } -void differential_analyzer::compare_reasoning_presence(const common_chat_template & tmpl, - diff_analysis_result & result) { +void differential_analyzer::compare_reasoning_presence(const common_chat_template & tmpl, reasoning_analysis & reasoning) { json user_msg = json{ { "role", "user" }, { "content", "Hello" } @@ -248,38 +250,34 @@ void differential_analyzer::compare_reasoning_presence(const common_chat_templat tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); }); if (!comparison) { - LOG_DBG(ANSI_ORANGE "R1: Template application failed, skipping reasoning detection\n" ANSI_RESET); + LOG_DBG(ANSI_ORANGE "%s: Template application failed, skipping reasoning detection\n" ANSI_RESET, __func__); return; } const auto & diff = comparison->diff; - LOG_DBG(ANSI_ORANGE "R1 diff - suffix: " ANSI_RESET "'%s', " ANSI_ORANGE "left: " ANSI_RESET "'%s', " ANSI_ORANGE - "right: " ANSI_ORANGE "'%s'\n" ANSI_RESET, - diff.suffix.c_str(), diff.left.c_str(), diff.right.c_str()); - const std::string reasoning_content = "Let me think about this."; if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) { auto seg = prune_whitespace_segments(segmentize_markers(diff.right)); if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) { // easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace) - result.reasoning = reasoning_mode::TAG_BASED; - result.markers.reasoning_start = trim_whitespace(seg[0].value); - result.markers.reasoning_end = trim_leading_whitespace(seg[2].value); + reasoning.mode = reasoning_mode::TAG_BASED; + reasoning.start = trim_whitespace(seg[0].value); + reasoning.end = trim_leading_whitespace(seg[2].value); for (size_t i = 3; i < seg.size(); i++) { - result.markers.reasoning_end += seg[i].value; + reasoning.end += seg[i].value; } // we always truncate because this doesn't really influence correctness but model might not always generate newline - result.markers.reasoning_end = trim_whitespace(result.markers.reasoning_end); + reasoning.end = trim_whitespace(reasoning.end); } else if (seg.size() >= 2 && trim_whitespace(seg[0].value) == reasoning_content) { // delimited - result.reasoning = reasoning_mode::DELIMITER; - result.markers.reasoning_end = trim_leading_whitespace(seg[1].value); + reasoning.mode = reasoning_mode::DELIMITER; + reasoning.end = trim_leading_whitespace(seg[1].value); for (size_t i = 2; i < seg.size(); i++) { - result.markers.reasoning_end += seg[i].value; + reasoning.end += seg[i].value; } - result.markers.reasoning_end = trim_whitespace(result.markers.reasoning_end); + reasoning.end = trim_whitespace(reasoning.end); } else if (seg.size() == 1 && trim_whitespace(seg[0].value) == reasoning_content) { // the marker might be in the prefix actually, let's check for case of // left: empty @@ -297,16 +295,16 @@ void differential_analyzer::compare_reasoning_presence(const common_chat_templat if (marker_seg.type == segment_type::TEXT) { marker_seg = pre_seg[pre_seg.size() - 2]; } - result.reasoning = reasoning_mode::FORCED_CLOSED; - result.markers.reasoning_start = trim_whitespace(marker_seg.value); - result.markers.reasoning_end = trim_whitespace(suf_seg[0].value); + reasoning.mode = reasoning_mode::FORCED_CLOSED; + reasoning.start = trim_whitespace(marker_seg.value); + reasoning.end = trim_whitespace(suf_seg[0].value); } } } } } -void differential_analyzer::compare_thinking_enabled(const common_chat_template & tmpl, diff_analysis_result & result) { +void differential_analyzer::compare_thinking_enabled(const common_chat_template & tmpl, reasoning_analysis & reasoning) { json user_msg = json{ { "role", "user" }, { "content", "Hello" } @@ -320,15 +318,12 @@ void differential_analyzer::compare_thinking_enabled(const common_chat_template auto comparison = compare_variants(tmpl, params, [&](template_params & p) { p.enable_thinking = true; }); if (!comparison) { - LOG_DBG("R2: Template application failed\n"); + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET , __func__); return; } const auto & diff = comparison->diff; - LOG_DBG("R2 diff - suffix: '%s', left: '%s', right: '%s'\n", diff.suffix.c_str(), diff.left.c_str(), - diff.right.c_str()); - std::string left_trimmed = diff.left; trim_whitespace(left_trimmed); @@ -337,18 +332,15 @@ void differential_analyzer::compare_thinking_enabled(const common_chat_template trim_whitespace(right_trimmed); if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) { - if (result.markers.reasoning_start.empty()) { - result.markers.reasoning_start = right_trimmed; - result.reasoning = reasoning_mode::FORCED_OPEN; - LOG_DBG("R2: Detected forced-open reasoning with start marker: '%s'\n", right_trimmed.c_str()); + if (reasoning.start.empty()) { + reasoning.start = right_trimmed; + reasoning.mode = reasoning_mode::FORCED_OPEN; } } } - if (result.markers.reasoning_start.empty() && !result.markers.reasoning_end.empty()) { - result.reasoning = reasoning_mode::DELIMITER; - LOG_DBG("R2: Delimiter-based reasoning detected (empty start, end: '%s')\n", - result.markers.reasoning_end.c_str()); + if (reasoning.start.empty() && !reasoning.end.empty()) { + reasoning.mode = reasoning_mode::DELIMITER; } // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers, @@ -360,33 +352,30 @@ void differential_analyzer::compare_thinking_enabled(const common_chat_template // Both should end with the assistant role marker // Check if output_A has both reasoning_start and reasoning_end markers // while output_B has only reasoning_start - if (!result.markers.reasoning_start.empty()) { + if (!reasoning.start.empty()) { // Check if output_A contains both start and end markers - bool A_has_start = output_A.find(result.markers.reasoning_start) != std::string::npos; - bool A_has_end = !result.markers.reasoning_end.empty() && - output_A.find(result.markers.reasoning_end) != std::string::npos; + bool A_has_start = output_A.find(reasoning.start) != std::string::npos; + bool A_has_end = !reasoning.end.empty() && output_A.find(reasoning.end) != std::string::npos; // Check if output_B contains only the start marker (and not the end marker) - bool B_has_start = output_B.find(result.markers.reasoning_start) != std::string::npos; - bool B_has_end = !result.markers.reasoning_end.empty() && - output_B.find(result.markers.reasoning_end) != std::string::npos; + bool B_has_start = output_B.find(reasoning.start) != std::string::npos; + bool B_has_end = !reasoning.end.empty() && output_B.find(reasoning.end) != std::string::npos; // For FORCED_CLOSED: A should have both, B should have only start if (A_has_start && A_has_end && B_has_start && !B_has_end) { - result.reasoning = reasoning_mode::FORCED_CLOSED; - LOG_DBG("R2: Detected forced-closed reasoning\n"); + reasoning.mode = reasoning_mode::FORCED_CLOSED; } - } else if (!result.markers.reasoning_end.empty()) { + } else if (!reasoning.end.empty()) { // We might not have detected the reasoning open marker until now, // but this is another chance to do so auto diff = comparison->diff; auto diff_rt = trim_whitespace(diff.right); auto diff_lt = trim_whitespace(diff.left); - if (diff_rt.empty() && diff_lt == result.markers.reasoning_end) { + if (diff_rt.empty() && diff_lt == reasoning.end) { auto seg = segmentize_markers(trim_whitespace(diff.prefix)); if (!seg.empty() && seg[seg.size() - 1].type == MARKER) { // this is FORCED_CLOSED - result.markers.reasoning_start = seg[seg.size() - 1].value; - result.reasoning = reasoning_mode::FORCED_CLOSED; + reasoning.start = seg[seg.size() - 1].value; + reasoning.mode = reasoning_mode::FORCED_CLOSED; } } } @@ -394,21 +383,21 @@ void differential_analyzer::compare_thinking_enabled(const common_chat_template // Check for slash-in-tag pattern: vs // diff shows: suffix="think>", left="/", right="" (or vice versa) - if (result.markers.reasoning_start.empty() && result.markers.reasoning_end.empty()) { + if (reasoning.start.empty() && reasoning.end.empty()) { if (diff.right.empty() && trim_whitespace(diff.left) == "/") { auto seg_A = segmentize_markers(trim_trailing_whitespace(comparison->output_A)); auto seg_B = segmentize_markers(trim_trailing_whitespace(comparison->output_B)); if (!seg_A.empty() && !seg_B.empty() && seg_A[seg_A.size() - 1].type == segment_type::MARKER && seg_B[seg_B.size() - 1].type == segment_type::MARKER) { - result.reasoning = reasoning_mode::FORCED_CLOSED; - result.markers.reasoning_start = seg_B[seg_B.size() - 1].value; - result.markers.reasoning_end = seg_A[seg_A.size() - 1].value; + reasoning.mode = reasoning_mode::FORCED_CLOSED; + reasoning.start = seg_B[seg_B.size() - 1].value; + reasoning.end = seg_A[seg_A.size() - 1].value; } } } } -void differential_analyzer::compare_reasoning_scope(const common_chat_template & tmpl, diff_analysis_result & result) { +void differential_analyzer::compare_reasoning_scope(const common_chat_template & tmpl, reasoning_analysis & reasoning) { json assistant_reasoning_content = json{ { "role", "assistant" }, { "content", "Here is my response." }, @@ -433,23 +422,18 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); }); if (!comparison) { - LOG_DBG("R3: Template application failed\n"); + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); return; } - const auto & diff = comparison->diff; - std::string reasoning_content = "Let me think."; - LOG_DBG("R3 diff - prefix: '%s', suffix: '%s', left: '%s', right: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str(), - diff.left.c_str(), diff.right.c_str()); - // Check if reasoning only appears in variant B (with tools) bool reasoning_in_A = comparison->output_A.find(reasoning_content) != std::string::npos; bool reasoning_in_B = comparison->output_B.find(reasoning_content) != std::string::npos; if (!reasoning_in_A && reasoning_in_B) { - result.reasoning = reasoning_mode::TOOLS_ONLY; + reasoning.mode = reasoning_mode::TOOLS_ONLY; LOG_DBG("R3: Detected TOOLS_ONLY reasoning mode\n"); // Extract reasoning markers from output_B @@ -464,8 +448,7 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & for (auto & segment : segments_before) { if (segment.type == segment_type::MARKER) { - result.markers.reasoning_start = segment.value; - LOG_DBG("R3: Found reasoning_start: '%s'\n", result.markers.reasoning_start.c_str()); + reasoning.start = segment.value; break; } } @@ -477,30 +460,25 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & if (!after_reasoning.empty()) { // Try to find matching end marker - if (!result.markers.reasoning_start.empty()) { + if (!reasoning.start.empty()) { auto segments = segmentize_markers(after_reasoning); for (auto & segment : segments) { if (segment.type == segment_type::MARKER) { - result.markers.reasoning_end = segment.value; + reasoning.end = segment.value; break; } } - if (!result.markers.reasoning_end.empty()) { - LOG_DBG("R3: Found reasoning_end (matched): '%s'\n", result.markers.reasoning_end.c_str()); - } } } } } } -void differential_analyzer::analyze_content(const common_chat_template & tmpl, diff_analysis_result & result) { +content_analysis differential_analyzer::analyze_content(const common_chat_template & tmpl, const reasoning_analysis & reasoning) { LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET); - compare_content_values(tmpl, result); -} - -void differential_analyzer::compare_content_values(const common_chat_template & tmpl, diff_analysis_result & result) { + content_analysis result; + json assistant_content_only = json{ { "role", "assistant" }, { "content", "Response text" } @@ -533,8 +511,7 @@ void differential_analyzer::compare_content_values(const common_chat_template & }); if (!comparison_with_tools || !comparison_with_reasoning) { - LOG_DBG("C1: Template application failed\n"); - return; + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); } const auto & diff_tools = comparison_with_tools->diff; @@ -548,52 +525,115 @@ void differential_analyzer::compare_content_values(const common_chat_template & if (trim_whitespace(diff_reasoning.left) == response || (segments.size() == 2 && trim_whitespace(segments[0].value) == response)) { // We only have the content text in the diff (possibly with a stray EOG marker), so no markers - LOG_DBG("C1: No content markers\n"); - result.content = content_mode::PLAIN; + result.mode = content_mode::PLAIN; found_plain_content = true; - } else if (result.reasoning != reasoning_mode::NONE && !result.markers.reasoning_end.empty() && - diff_reasoning.left.find(result.markers.reasoning_end) != std::string::npos) { + } else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty() && + diff_reasoning.left.find(reasoning.end) != std::string::npos) { std::string post_closed_reasoning = diff_reasoning.left.substr( - diff_reasoning.left.find(result.markers.reasoning_end) + result.markers.reasoning_end.length()); + diff_reasoning.left.find(reasoning.end) + reasoning.end.length()); if (trim_whitespace(post_closed_reasoning) == "Response text") { LOG_DBG("C1: No content markers after stripping reasoning close marker\n"); - result.content = content_mode::PLAIN; + result.mode = content_mode::PLAIN; found_plain_content = true; } } } if (!found_plain_content) { std::string rdiff = diff_reasoning.left; - if (!result.markers.reasoning_end.empty() && rdiff.find(result.markers.reasoning_end) != std::string::npos) { - rdiff = rdiff.substr(rdiff.find(result.markers.reasoning_end) + result.markers.reasoning_end.length()); + if (!reasoning.end.empty() && rdiff.find(reasoning.end) != std::string::npos) { + rdiff = rdiff.substr(rdiff.find(reasoning.end) + reasoning.end.length()); } // Take the more promising diff std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left; size_t pos = pure_content.find("Response text"); if (pos == std::string::npos) { - LOG_DBG("C1: Error: response text not found - improper template application?"); - return; + LOG_DBG(ANSI_ORANGE "%s: Error: response text not found - improper template application?\n" ANSI_RESET, __func__); + return result; } - result.markers.content_start = trim_leading_whitespace(pure_content.substr(0, pos)); - result.markers.content_end = - trim_leading_whitespace(pure_content.substr(pos + 13)); // 13 - len of "Response text" + result.start = trim_leading_whitespace(pure_content.substr(0, pos)); + result.end = trim_leading_whitespace(pure_content.substr(pos + 13)); // 13 - len of "Response text" // TODO: WRAPPED_WITH_REASONING } // Determine content mode - if (!result.markers.content_start.empty() || !result.markers.content_end.empty()) { - result.content = content_mode::ALWAYS_WRAPPED; - LOG_DBG("C1: Content is ALWAYS_WRAPPED\n"); + if (!result.start.empty() || !result.end.empty()) { + result.mode = content_mode::ALWAYS_WRAPPED; // TODO: END_DELIMITED content mode - delimited at end but not at start? } + + return result; } -void differential_analyzer::analyze_tool_call_format(const std::string & haystack, - const std::string & fun_name_needle, - const std::string & arg_name_needle, - diff_analysis_result & result) { +tool_analysis differential_analyzer::analyze_tools(const common_chat_template & tmpl, + const jinja::caps & caps, + const reasoning_analysis & reasoning) { + tool_analysis result; + LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET); + + result.format = analyze_tool_calls(tmpl, reasoning); + + if (result.format.mode != tool_format::NONE && result.format.mode != tool_format::JSON_NATIVE) { + if (caps.supports_parallel_tool_calls) { + check_per_call_markers(tmpl, result.format); + } + result.function = extract_function_markers(tmpl, result.format); + if (result.format.mode == tool_format::TAG_WITH_TAGGED) { + result.arguments = analyze_arguments(tmpl, result); + } + extract_argument_separator(tmpl, result.arguments); + extract_args_markers(tmpl, result, result.arguments); + result.call_id = extract_call_id_markers(tmpl, result.format); + } + + return result; +} + +tool_format_analysis differential_analyzer::analyze_tool_calls(const common_chat_template & tmpl, + const reasoning_analysis & reasoning) { + json assistant_no_tools = json{ + { "role", "assistant" }, + { "content", "Response." } + }; + + json assistant_with_tools = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_no_tools }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return tool_format_analysis(); + } + + const auto & diff = comparison->diff; + + std::string tool_section = diff.right; + + if (tool_section.empty()) { + return tool_format_analysis(); + } + + return analyze_tool_call_format(tool_section, "foofoo", "first", reasoning); +} + +tool_format_analysis differential_analyzer::analyze_tool_call_format(const std::string & haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + const reasoning_analysis & reasoning) { + tool_format_analysis result; + if (fun_name_needle.empty() || arg_name_needle.empty() || haystack.empty()) { - return; + return result; } auto in_json_haystack = [&haystack](const std::string & needle) -> bool { @@ -618,43 +658,44 @@ void differential_analyzer::analyze_tool_call_format(const std::string & hays if (in_json_haystack(fun_name_needle)) { // no need to check further, we're in JSON land - result.tools = tool_format::JSON_NATIVE; + result.mode = tool_format::JSON_NATIVE; } else if (in_json_haystack(arg_name_needle)) { - result.tools = tool_format::TAG_WITH_JSON; + result.mode = tool_format::TAG_WITH_JSON; } else { - result.tools = tool_format::TAG_WITH_TAGGED; + result.mode = tool_format::TAG_WITH_TAGGED; } // first, remove any reasoning markers std::string clean_haystack = haystack; - if (!result.markers.reasoning_start.empty()) { - auto pos = haystack.find(result.markers.reasoning_start); + if (!reasoning.start.empty()) { + auto pos = haystack.find(reasoning.start); if (pos != std::string::npos) { - clean_haystack = haystack.substr(0, pos) + haystack.substr(pos + result.markers.reasoning_start.length()); + clean_haystack = haystack.substr(0, pos) + haystack.substr(pos + reasoning.start.length()); } } - if (!result.markers.reasoning_end.empty()) { - auto pos = clean_haystack.find(result.markers.reasoning_end); + if (!reasoning.end.empty()) { + auto pos = clean_haystack.find(reasoning.end); if (pos != std::string::npos) { - clean_haystack = - clean_haystack.substr(0, pos) + clean_haystack.substr(pos + result.markers.reasoning_end.length()); + clean_haystack = clean_haystack.substr(0, pos) + clean_haystack.substr(pos + reasoning.end.length()); } } - if (result.tools == tool_format::JSON_NATIVE) { + if (result.mode == tool_format::JSON_NATIVE) { analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle, result); } else { analyze_tool_call_format_non_json(clean_haystack, fun_name_needle, result); } // always relax whitespace requirements on ending markers since they don't influence content - result.markers.tool_section_end = trim_whitespace(result.markers.tool_section_end); - result.markers.per_call_end = trim_whitespace(result.markers.per_call_end); + result.section_end = trim_whitespace(result.section_end); + result.per_call_end = trim_whitespace(result.per_call_end); + + return result; } void differential_analyzer::analyze_tool_call_format_json_native(const std::string & clean_haystack, const std::string & fun_name_needle, const std::string & arg_name_needle, - diff_analysis_result & result) { + tool_format_analysis & format) { // we might not have the typical OpenAI tool calling structure int json_start = clean_haystack.find_first_of('{'); int json_end = clean_haystack.find_last_of('}'); @@ -662,28 +703,28 @@ void differential_analyzer::analyze_tool_call_format_json_native(const std::stri json call_struct = json::parse(cut); auto register_field = [&](const std::string & prefix, const nlohmann::detail::iteration_proxy_value & subel) { if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) { - result.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + format.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) { - result.name_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + format.name_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); } else if (subel.value().dump().find(arg_name_needle) != std::string::npos) { // handle both string and JSON obj variants - result.args_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + format.args_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); } else if (subel.key().find("id") != std::string::npos) { // heuristics for generated id field - result.gen_id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); + format.gen_id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key(); } }; for (const auto & el : call_struct.items()) { if (el.key() == fun_name_needle) { - result.fun_name_is_key = true; + format.fun_name_is_key = true; // When function name is the key, there's no name field and args are direct - result.name_field.clear(); - result.args_field.clear(); + format.name_field.clear(); + format.args_field.clear(); // Don't register this element - the function name IS the key, not a field } else { if (el.value().is_object() && el.value().dump().find(arg_name_needle) == std::string::npos) { // not the args object - result.function_field = el.key(); + format.function_field = el.key(); for (const auto & subel : el.value().items()) { register_field(el.key(), subel); } @@ -700,11 +741,11 @@ void differential_analyzer::analyze_tool_call_format_json_native(const std::stri if (json_start > 0 && space_or_bracket(true, clean_haystack[json_start - 1])) { for (--json_start; space_or_bracket(true, clean_haystack[json_start]) && json_start >= 0; json_start--) { if (clean_haystack[json_start] == '[') { - result.tools_array_wrapped = true; + format.tools_array_wrapped = true; break; } } - if (!result.tools_array_wrapped) { + if (!format.tools_array_wrapped) { json_start++; // we ate into the last pre-json character } } @@ -716,35 +757,35 @@ void differential_analyzer::analyze_tool_call_format_json_native(const std::stri } std::vector> located_params; - if (!result.name_field.empty()) { - located_params.push_back({ clean_haystack.find(result.name_field), result.name_field }); + if (!format.name_field.empty()) { + located_params.push_back({ clean_haystack.find(format.name_field), format.name_field }); } - if (!result.args_field.empty()) { - located_params.push_back({ clean_haystack.find(result.args_field), result.args_field }); + if (!format.args_field.empty()) { + located_params.push_back({ clean_haystack.find(format.args_field), format.args_field }); } - if (!result.id_field.empty()) { - located_params.push_back({ clean_haystack.find(result.id_field), result.id_field }); + if (!format.id_field.empty()) { + located_params.push_back({ clean_haystack.find(format.id_field), format.id_field }); } - if (!result.gen_id_field.empty()) { - located_params.push_back({ clean_haystack.find(result.gen_id_field), result.gen_id_field }); + if (!format.gen_id_field.empty()) { + located_params.push_back({ clean_haystack.find(format.gen_id_field), format.gen_id_field }); } std::sort(located_params.begin(), located_params.end()); for (auto & pair : located_params) { - result.parameter_order.push_back(pair.second); + format.parameter_order.push_back(pair.second); } // we can immediately extract tool calling markers too - result.markers.tool_section_start = trim_leading_whitespace(clean_haystack.substr(0, json_start)); - result.markers.tool_section_end = trim_whitespace(clean_haystack.substr(json_end)); + format.section_start = trim_leading_whitespace(clean_haystack.substr(0, json_start)); + format.section_end = trim_whitespace(clean_haystack.substr(json_end)); // When tools_array_wrapped is true, the closing bracket is part of the array structure, // not a separate section end marker. Clear tool_section_end to avoid duplicate brackets. - if (result.tools_array_wrapped && result.markers.tool_section_end == "]") { - result.markers.tool_section_end.clear(); + if (format.tools_array_wrapped && format.section_end == "]") { + format.section_end.clear(); } } void differential_analyzer::analyze_tool_call_format_non_json(const std::string & clean_haystack, const std::string & fun_name_needle, - diff_analysis_result & result) { + tool_format_analysis & format) { // we need to split by markers... auto haystack_split = segmentize_markers(trim_leading_whitespace(clean_haystack)); int where_is_nemo = 0; @@ -776,17 +817,15 @@ void differential_analyzer::analyze_tool_call_format_non_json(const std::string if (haystack_split[seg].type == MARKER) { if (!had_marker) { had_marker = true; - result.markers.per_call_start = haystack_split[seg].value + result.markers.per_call_start; + format.per_call_start = haystack_split[seg].value + format.per_call_start; } else { - result.markers.tool_section_start = - haystack_split[seg].value + result.markers.tool_section_start; + format.section_start = haystack_split[seg].value + format.section_start; } } else { if (had_marker) { - result.markers.tool_section_start = - haystack_split[seg].value + result.markers.tool_section_start; + format.section_start = haystack_split[seg].value + format.section_start; } else { - result.markers.per_call_start = haystack_split[seg].value + result.markers.per_call_start; + format.per_call_start = haystack_split[seg].value + format.per_call_start; } } } @@ -797,15 +836,15 @@ void differential_analyzer::analyze_tool_call_format_non_json(const std::string backtracked_so_far++; if (!had_marker) { had_marker = true; - result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end; + format.section_end = haystack_split[seg].value + format.section_end; } else { - result.markers.per_call_end = haystack_split[seg].value + result.markers.per_call_end; + format.per_call_end = haystack_split[seg].value + format.per_call_end; } } else { if (had_marker) { - result.markers.per_call_end = haystack_split[seg].value + result.markers.per_call_end; + format.per_call_end = haystack_split[seg].value + format.per_call_end; } else { - result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end; + format.section_end = haystack_split[seg].value + format.section_end; } } if (backtracked_so_far >= how_many_markers) { @@ -814,19 +853,19 @@ void differential_analyzer::analyze_tool_call_format_non_json(const std::string } } else { for (int seg = 0; seg < where_is_nemo; seg++) { - result.markers.tool_section_start += haystack_split[seg].value; + format.section_start += haystack_split[seg].value; } for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) { - result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end; + format.section_end = haystack_split[seg].value + format.section_end; if (haystack_split[seg].type == segment_type::MARKER) { break; } } } } else { - result.markers.tool_section_start += haystack_split[0].value; + format.section_start += haystack_split[0].value; for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) { - result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end; + format.section_end = haystack_split[seg].value + format.section_end; if (haystack_split[seg].type == segment_type::MARKER) { break; } @@ -834,28 +873,7 @@ void differential_analyzer::analyze_tool_call_format_non_json(const std::string } } -void differential_analyzer::analyze_tools(const common_chat_template & tmpl, diff_analysis_result & result) { - LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET); - analyze_tool_calls(tmpl, result); - - if (result.tools == tool_format::NONE) { - LOG_DBG("T1: No tool support found\n"); - // Continue anyway - we may still have useful markers - } else if (result.tools != tool_format::JSON_NATIVE) { - if (result.supports_parallel_calls) { - check_per_call_markers(tmpl, result); - } - extract_function_markers(tmpl, result); - extract_argument_separator(tmpl, result); - extract_args_markers(tmpl, result); - extract_call_id_markers(tmpl, result); - if (result.tools == tool_format::TAG_WITH_TAGGED) { - analyze_arguments(tmpl, result); - } - } -} - -void differential_analyzer::check_per_call_markers(const common_chat_template & tmpl, diff_analysis_result & result) { +void differential_analyzer::check_per_call_markers(const common_chat_template & tmpl, tool_format_analysis & result) { json assistant_one_tool = json{ { "role", "assistant" }, { "content", "" }, @@ -878,112 +896,23 @@ void differential_analyzer::check_per_call_markers(const common_chat_template & tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); }); if (!one_vs_two) { - LOG_DBG("T2: Generating double tool call comparison failed\n"); + LOG_DBG(ANSI_ORANGE "%s: Generating double tool call comparison failed\n" ANSI_RESET, __func__); return; } std::string second_tool_content = trim_leading_whitespace(one_vs_two->diff.right); - if (!result.markers.tool_section_start.empty() && - second_tool_content.find(result.markers.tool_section_start) == 0) { - result.markers.per_call_start = result.markers.tool_section_start; - result.markers.per_call_end = result.markers.tool_section_end; - result.markers.tool_section_start.clear(); - result.markers.tool_section_end.clear(); + if (!result.section_start.empty() && + second_tool_content.find(result.section_start) == 0) { + result.per_call_start = result.section_start; + result.per_call_end = result.section_end; + result.section_start.clear(); + result.section_end.clear(); } } -void differential_analyzer::analyze_tool_calls(const common_chat_template & tmpl, diff_analysis_result & result) { - json assistant_no_tools = json{ - { "role", "assistant" }, - { "content", "Response." } - }; +tool_function_analysis differential_analyzer::extract_function_markers(const common_chat_template & tmpl, const tool_format_analysis & analysis) { + tool_function_analysis result; - json assistant_with_tools = json{ - { "role", "assistant" }, - { "content", "" }, - { "tool_calls", json::array({ first_tool_call }) } - }; - - template_params params; - params.messages = json::array({ user_msg, assistant_no_tools }); - params.tools = tools; - params.add_generation_prompt = false; - params.enable_thinking = true; - - auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); }); - - if (!comparison) { - LOG_DBG("T1: Template application failed\n"); - return; - } - - const auto & diff = comparison->diff; - LOG_DBG("T1 diff - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str()); - LOG_DBG("T1 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); - - std::string tool_section = diff.right; - - if (tool_section.empty()) { - return; - } - - analyze_tool_call_format(tool_section, "foofoo", "first", result); - - LOG_DBG("T1: tool_section_start='%s', tool_section_end='%s'\n", result.markers.tool_section_start.c_str(), - result.markers.tool_section_end.c_str()); -} - -void differential_analyzer::extract_call_separator(const common_chat_template & tmpl, - diff_analysis_result & result, - std::string & second_call_content) { - json assistant_one_call = json{ - { "role", "assistant" }, - { "content", "" }, - { "tool_calls", json::array({ first_tool_call }) } - }; - - json assistant_two_calls = json{ - { "role", "assistant" }, - { "content", "" }, - { "tool_calls", json::array({ first_tool_call, second_tool_call }) } - }; - - template_params params; - params.messages = json::array({ user_msg, assistant_one_call }); - params.tools = tools; - params.add_generation_prompt = false; - params.enable_thinking = true; - - auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_calls }); }); - - if (!comparison) { - LOG_DBG("T2: Template application failed\n"); - return; - } - - const auto & diff = comparison->diff; - LOG_DBG("T2 diff - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str()); - LOG_DBG("T2 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); - - if (!diff.right.empty()) { - std::string first_func_name = "foofoo"; - std::string second_func_name = "barbar"; - - std::string separator = until_common_prefix(diff.right, first_func_name, second_func_name); - result.markers.call_separator = trim_whitespace(separator); - - LOG_DBG("T2: call_separator='%s'\n", result.markers.call_separator.c_str()); - - result.supports_parallel_calls = true; - second_call_content = diff.right; - - LOG_DBG("T2: second_call_content='%s', supports_parallel_calls=true\n", second_call_content.c_str()); - } -} - -void differential_analyzer::extract_function_markers(const common_chat_template & tmpl, diff_analysis_result & result) { json assistant_nocall = json{ { "role", "assistant" }, { "content", "BBBB" }, @@ -1011,8 +940,8 @@ void differential_analyzer::extract_function_markers(const common_chat_template tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); }); if (!comparison) { - LOG_DBG("T3: Template application failed\n"); - return; + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return result; } const auto & diff = comparison->diff; @@ -1021,26 +950,26 @@ void differential_analyzer::extract_function_markers(const common_chat_template if (diff.left.find("foofoo") != std::string::npos && diff.right.find("barbar") != std::string::npos) { std::string prefix_marker; - if (!result.markers.per_call_start.empty()) { - prefix_marker = result.markers.per_call_start; + if (!analysis.per_call_start.empty()) { + prefix_marker = analysis.per_call_start; } else { - prefix_marker = result.markers.tool_section_start; + prefix_marker = analysis.section_start; } if (!prefix_marker.empty() && diff.prefix.rfind(prefix_marker) != std::string::npos) { - result.markers.func_name_prefix = + result.name_prefix = diff.prefix.substr(diff.prefix.rfind(prefix_marker) + prefix_marker.size()); } auto seg = segmentize_markers(diff.left); for (const auto & s : seg) { if (s.value.find("foofoo") == std::string::npos) { - result.markers.func_name_prefix += s.value; + result.name_prefix += s.value; } else { size_t pos = s.value.find("foofoo"); std::string pre = s.value.substr(0, pos); std::string post = s.value.substr(pos + 6); // 6 = len("foofoo") - result.markers.func_name_prefix += pre; - result.markers.func_name_suffix += post; + result.name_prefix += pre; + result.name_suffix += post; break; } } @@ -1050,7 +979,7 @@ void differential_analyzer::extract_function_markers(const common_chat_template size_t stop_internal_pos = 0; for (const auto & ss : seg_suf) { bool has_needle = false; - if (result.tools == tool_format::TAG_WITH_JSON) { + if (analysis.mode == tool_format::TAG_WITH_JSON) { has_needle = (ss.type == segment_type::TEXT && ss.value.find_first_of("{[") != std::string::npos); if (has_needle) { stop_internal_pos = ss.value.find_first_of("{["); @@ -1066,7 +995,7 @@ void differential_analyzer::extract_function_markers(const common_chat_template stop++; } if (stop < seg_suf.size() - 1) { - if (result.tools == tool_format::TAG_WITH_TAGGED) { + if (analysis.mode == tool_format::TAG_WITH_TAGGED) { size_t how_far = 0; if (stop > 0) { if (seg_suf[stop].type == segment_type::MARKER) { @@ -1075,24 +1004,24 @@ void differential_analyzer::extract_function_markers(const common_chat_template how_far = stop - 1; } for (size_t i = 0; i < how_far; i++) { - result.markers.func_name_suffix += seg_suf[i].value; + result.name_suffix += seg_suf[i].value; } } } else { for (size_t i = 0; i < stop; i++) { - result.markers.func_name_suffix += seg_suf[i].value; + result.name_suffix += seg_suf[i].value; } const std::string & stopper = seg_suf[stop].value; - result.markers.func_name_suffix += stopper.substr(0, stop_internal_pos); + result.name_suffix += stopper.substr(0, stop_internal_pos); } } // now just to find the closer std::string suffix_marker; - if (!result.markers.per_call_end.empty()) { - suffix_marker = result.markers.per_call_end; + if (!analysis.per_call_end.empty()) { + suffix_marker = analysis.per_call_end; } else { - suffix_marker = result.markers.tool_section_end; + suffix_marker = analysis.section_end; } std::string closer_suffix; if (suffix_marker.empty()) { @@ -1106,18 +1035,18 @@ void differential_analyzer::extract_function_markers(const common_chat_template } if (!closer_suffix.empty()) { auto closer_seg = segmentize_markers(closer_suffix); - bool need_to_eat_arg_marker = (result.tools == tool_format::TAG_WITH_TAGGED); + bool need_to_eat_arg_marker = (analysis.mode == tool_format::TAG_WITH_TAGGED); size_t last_arg_seg = closer_seg.size() - 1; for (int i = (int) closer_seg.size() - 1; i >= 0; i--) { if (closer_seg[i].value.find("YYYY") != std::string::npos) { last_arg_seg = i; } } - if (result.tools == tool_format::TAG_WITH_JSON) { + if (analysis.mode == tool_format::TAG_WITH_JSON) { const auto & entire_seg = closer_seg[last_arg_seg].value; size_t pos = entire_seg.find_last_of("}]"); if (pos != std::string::npos && pos < entire_seg.size() - 1) { - result.markers.func_close = trim_leading_whitespace(entire_seg.substr(pos + 1)); + result.close = trim_leading_whitespace(entire_seg.substr(pos + 1)); } } for (size_t i = last_arg_seg + 1; i < closer_seg.size(); i++) { @@ -1125,23 +1054,204 @@ void differential_analyzer::extract_function_markers(const common_chat_template if (need_to_eat_arg_marker) { need_to_eat_arg_marker = false; } else { - result.markers.func_close += closer_seg[i].value; + result.close += closer_seg[i].value; } } else if (!need_to_eat_arg_marker) { - result.markers.func_close += closer_seg[i].value; + result.close += closer_seg[i].value; } } } - result.markers.func_close = trim_leading_whitespace(result.markers.func_close); + result.close = trim_leading_whitespace(result.close); + } + return result; +} - LOG_DBG("T3: func_name_prefix='%s', func_name_suffix='%s', func_close='%s'\n", - result.markers.func_name_prefix.c_str(), result.markers.func_name_suffix.c_str(), - result.markers.func_close.c_str()); +tool_arguments_analysis differential_analyzer::analyze_arguments(const common_chat_template & tmpl, const tool_analysis & tool_analysis) { + LOG_DBG(ANSI_ORANGE "Phase 4: Argument analysis\n" ANSI_RESET); + + tool_arguments_analysis result; + + extract_argument_name_markers(tmpl, result); + extract_argument_value_markers(tmpl, tool_analysis, result); + + return result; +} + +void differential_analyzer::extract_argument_name_markers(const common_chat_template & tmpl, + tool_arguments_analysis & args_analysis) { + json assistant_first_arg = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + json assistant_second_arg = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_other_arg }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_first_arg }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + if (!diff.left.empty() && !diff.right.empty()) { + size_t common_len = 0; + size_t min_len = std::min(diff.left.length(), diff.right.length()); + while (common_len < min_len && diff.left[common_len] == diff.right[common_len]) { + common_len++; + } + + if (common_len > 0) { // we have a marker structure with the name *inside* the marker + std::string common_prefix = diff.left.substr(0, common_len); + std::string left_remainder = diff.left.substr(common_len); + std::string right_remainder = diff.right.substr(common_len); + size_t left_close = + left_remainder.find_first_of("\"X"); // because arg-val is XXXX, can be quoted or unquoted + size_t right_close = right_remainder.find_first_of("\"Y"); // here arg-val is YYYY + + if (left_close != std::string::npos && right_close != std::string::npos) { + std::string left_name = left_remainder.substr(0, 5); // 5 = len("first") + std::string right_name = right_remainder.substr(0, 6); // 6 = len("second") + + if (left_name == "first" && right_name == "second") { + args_analysis.name_prefix = trim_whitespace(common_prefix); + std::string suffix_left = left_remainder.substr(5, left_close - 5); + std::string suffix_right = right_remainder.substr(6, right_close - 6); + if (suffix_left == suffix_right) { + args_analysis.name_suffix = trim_leading_whitespace(suffix_left); + } + } + } + } else if (diff.left.substr(0, 5) == "first" && diff.right.substr(0, 6) == "second") { + // we most likely have actual markers for argument names + auto pre_seg = segmentize_markers(diff.prefix); + for (int i = pre_seg.size() - 1; i >= 0; i--) { + args_analysis.name_prefix = args_analysis.name_prefix + pre_seg[i].value; + if (pre_seg[i].type == segment_type::MARKER) { + break; + } + } + auto left_seg = segmentize_markers(diff.left); + if (left_seg.size() == 1) { // only the name + maybe extra whitespace / normal chars in differing part + args_analysis.name_suffix = diff.left.substr(5); + auto suf_seg= segmentize_markers(diff.suffix); + for (size_t i = 0; i < suf_seg.size(); i++) { + args_analysis.name_suffix += suf_seg[i].value; + if (suf_seg[i].type == segment_type::MARKER) { + if (i < suf_seg.size() - 2 && suf_seg[i + 1].type == segment_type::TEXT && + trim_whitespace(suf_seg[i + 1].value).empty()) { + // we need to include post-marker whitespace/newlines as well + args_analysis.name_suffix += suf_seg[i + 1].value; + } + break; + } + } + } else { + for (size_t i = 0; i < left_seg.size(); i++) { + std::string to_add; + if (i == 0) { + to_add = left_seg[i].value.substr(5); + } else { + to_add = left_seg[i].value; + } + args_analysis.name_suffix += to_add; + if (left_seg[i].type == segment_type::MARKER) { + if (i < left_seg.size() - 2 && left_seg[i + 1].type == segment_type::TEXT && + trim_whitespace(left_seg[i + 1].value).empty()) { + // we need to include post-marker whitespace/newlines as well + args_analysis.name_suffix += left_seg[i + 1].value; + } + break; + } + } + } + } + } +} + +void differential_analyzer::extract_argument_value_markers(const common_chat_template & tmpl, + const tool_analysis & analysis, + tool_arguments_analysis & args_analysis) { + json assistant_val_X = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg }) } + }; + + json assistant_val_Y = json{ + { "role", "assistant" }, + { "content", "" }, + { "tool_calls", json::array({ first_tool_call_one_arg_other_val }) } + }; + + template_params params; + params.messages = json::array({ user_msg, assistant_val_X }); + params.tools = tools; + params.add_generation_prompt = false; + params.enable_thinking = true; + + auto comparison = compare_variants( + tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); }); + + if (!comparison) { + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; + } + + const auto & diff = comparison->diff; + + if (diff.left == "XXXX" && diff.right == "YYYY") { + std::string arg_name_ending = "first" + args_analysis.name_suffix; + std::string prefix = diff.prefix; + if (prefix.rfind(arg_name_ending) != std::string::npos) { + prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size()); + } + if (!prefix.empty()) { + auto seg_pre = segmentize_markers(prefix); + for (int i = seg_pre.size() - 1; i >= 0; i--) { + args_analysis.value_prefix = seg_pre[i].value + args_analysis.value_prefix; + if (seg_pre[i].type == segment_type::MARKER) { + break; + } + } + } + + std::string value_suffix = diff.suffix; + if (!analysis.function.close.empty()) { + size_t func_close_pos = value_suffix.find(analysis.function.close); + if (func_close_pos != std::string::npos) { + value_suffix = value_suffix.substr(0, func_close_pos); + } + } else if (!analysis.format.per_call_end.empty() || !analysis.format.section_end.empty()) { + std::string end_marker = + !analysis.format.per_call_end.empty() ? analysis.format.per_call_end : analysis.format.section_end; + size_t end_marker_pos = value_suffix.find(end_marker); + if (end_marker_pos != std::string::npos) { + value_suffix = value_suffix.substr(0, end_marker_pos); + } + } + value_suffix = trim_leading_whitespace(value_suffix); + if (!value_suffix.empty()) { + args_analysis.value_suffix = value_suffix; + } } } void differential_analyzer::extract_argument_separator(const common_chat_template & tmpl, - diff_analysis_result & result) { + tool_arguments_analysis & args_analysis) { json assistant_one_arg = json{ { "role", "assistant" }, { "content", "" }, @@ -1164,31 +1274,30 @@ void differential_analyzer::extract_argument_separator(const common_chat_templat tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); }); if (!comparison) { - LOG_DBG("T4: Template application failed\n"); + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); return; } const auto & diff = comparison->diff; - LOG_DBG("T4 diff - suffix: '%s'\n", diff.suffix.c_str()); - LOG_DBG("T4 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); if (!diff.right.empty()) { std::string separator = until_common_prefix(diff.right, "first", "second"); - result.markers.arg_separator = separator; - LOG_DBG("T4: arg_separator='%s'\n", result.markers.arg_separator.c_str()); + args_analysis.separator = separator; } } -void differential_analyzer::extract_args_markers(const common_chat_template & tmpl, diff_analysis_result & result) { +void differential_analyzer::extract_args_markers(const common_chat_template & tmpl, + const tool_analysis & analysis, + tool_arguments_analysis & args_analysis) { json assistant_no_args = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant"}, + { "content", "" }, { "tool_calls", json::array({ first_tool_call_zero_args }) } }; json assistant_with_args = json{ - { "role", "assistant" }, - { "content", "" }, + { "role", "assistant"}, + { "content", "" }, { "tool_calls", json::array({ first_tool_call_one_arg }) } }; @@ -1202,19 +1311,15 @@ void differential_analyzer::extract_args_markers(const common_chat_template & tm tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); }); if (!comparison) { - LOG_DBG("T5: Template application failed\n"); + LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); return; } const auto & diff = comparison->diff; - LOG_DBG("T5 diff - suffix: '%s'\n", diff.suffix.c_str()); - LOG_DBG("T5 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); - if (result.markers.args_start.empty() && result.tools != tool_format::JSON_NATIVE) { - std::string prefix_marker = !result.markers.tool_section_start.empty() ? result.markers.tool_section_start : - result.markers.per_call_start; - std::string suffix_marker = - !result.markers.tool_section_end.empty() ? result.markers.tool_section_end : result.markers.per_call_end; + if (analysis.format.mode != tool_format::JSON_NATIVE) { + std::string prefix_marker = !analysis.format.section_start.empty() ? analysis.format.section_start : analysis.format.per_call_start; + std::string suffix_marker = !analysis.format.section_end.empty() ? analysis.format.section_end : analysis.format.per_call_end; // these might happen earlier in the tools section as an example or somewhere else, so we need to find the closest ones size_t prefix_pos = prefix_marker.empty() ? 0 : diff.prefix.rfind(prefix_marker); size_t suffix_pos = suffix_marker.empty() ? diff.suffix.size() : diff.suffix.find(suffix_marker); @@ -1230,15 +1335,15 @@ void differential_analyzer::extract_args_markers(const common_chat_template & tm std::string args_end = after_common_suffix(suffix_cut, "{}", "\"XXXX\"}"); if (!args_start.empty() || !args_end.empty()) { - result.markers.args_start = args_start; - result.markers.args_end = args_end; - LOG_DBG("T5: Custom argument container detected: start='%s', end='%s'\n", args_start.c_str(), - args_end.c_str()); + args_analysis.start = args_start; + args_analysis.end = args_end; } } } -void differential_analyzer::extract_call_id_markers(const common_chat_template & tmpl, diff_analysis_result & result) { +tool_id_analysis differential_analyzer::extract_call_id_markers(const common_chat_template & tmpl, tool_format_analysis & analysis) { + tool_id_analysis result; + json assistant_id1 = json{ { "role", "assistant" }, { "content", "" }, @@ -1261,8 +1366,8 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); }); if (!comparison) { - LOG_DBG("T6: Template application failed for call_id detection\n"); - return; + LOG_DBG(ANSI_ORANGE "%s: Template application failed for call_id detection\n" ANSI_RESET, __func__); + return result; } const auto & diff = comparison->diff; @@ -1270,8 +1375,7 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & LOG_DBG("T6 diff (call_id) - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); if (diff.left.empty() && diff.right.empty()) { - LOG_DBG("T6: No call_id difference detected\n"); - return; + return result; } std::string id_value_1 = "call00001"; @@ -1302,8 +1406,7 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & if (args_in_suffix != std::string::npos && (args_in_prefix == std::string::npos || args_in_prefix > diff.prefix.length())) { // Args are in suffix, so call_id is BETWEEN_FUNC_AND_ARGS - result.call_id_pos = call_id_position::BETWEEN_FUNC_AND_ARGS; - LOG_DBG("T6: Detected BETWEEN_FUNC_AND_ARGS position\n"); + result.pos = call_id_position::BETWEEN_FUNC_AND_ARGS; // The prefix ends with: ... // Segmentize to find the call_id_prefix marker @@ -1328,14 +1431,12 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & } if (!marker_before_id.empty()) { - result.markers.call_id_prefix = marker_before_id; - LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str()); + result.prefix = marker_before_id; } else { // Fallback: look for the last marker in after_func for (int i = (int) segments.size() - 1; i >= 0; i--) { if (segments[i].type == segment_type::MARKER) { - result.markers.call_id_prefix = segments[i].value; - LOG_DBG("T6: call_id_prefix (fallback)='%s'\n", result.markers.call_id_prefix.c_str()); + result.prefix = segments[i].value; break; } } @@ -1345,8 +1446,8 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & auto suffix_segments = segmentize_markers(diff.suffix); for (size_t i = 0; i < suffix_segments.size(); i++) { if (suffix_segments[i].type == segment_type::MARKER) { - result.markers.call_id_suffix = suffix_segments[i].value; - LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str()); + result.suffix = suffix_segments[i].value; + LOG_DBG("T6: call_id_suffix='%s'\n", result.suffix.c_str()); break; } // Stop if we hit the args @@ -1356,8 +1457,7 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & } } else if (args_in_prefix != std::string::npos) { // Args are in prefix, so call_id is POST_ARGS - result.call_id_pos = call_id_position::POST_ARGS; - LOG_DBG("T6: POST_ARGS call_id position detected\n"); + result.pos = call_id_position::POST_ARGS; // Extract markers from between args and the ID std::string after_args = diff.prefix.substr(args_in_prefix); @@ -1367,8 +1467,8 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & auto segments = segmentize_markers(between_args_and_id); for (int i = (int) segments.size() - 1; i >= 0; i--) { if (segments[i].type == segment_type::MARKER) { - result.markers.call_id_prefix = segments[i].value; - LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str()); + result.prefix = segments[i].value; + LOG_DBG("T6: call_id_prefix='%s'\n", result.prefix.c_str()); break; } } @@ -1378,23 +1478,20 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & auto suffix_segments = segmentize_markers(diff.suffix); for (const auto & seg : suffix_segments) { if (seg.type == segment_type::MARKER) { - result.markers.call_id_suffix = seg.value; - LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str()); + result.suffix = seg.value; break; } } } } else if (func_name_in_suffix != std::string::npos && func_name_in_prefix == std::string::npos) { // Function name is only in suffix - call_id is PRE_FUNC_NAME - result.call_id_pos = call_id_position::PRE_FUNC_NAME; - LOG_DBG("T6: PRE_FUNC_NAME call_id position detected\n"); + result.pos = call_id_position::PRE_FUNC_NAME; // Extract call_id_prefix from prefix (last marker before the common_id_part) auto prefix_segments = segmentize_markers(diff.prefix); for (int i = (int) prefix_segments.size() - 1; i >= 0; i--) { if (prefix_segments[i].type == segment_type::MARKER) { - result.markers.call_id_prefix = prefix_segments[i].value; - LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str()); + result.prefix = prefix_segments[i].value; break; } } @@ -1404,210 +1501,20 @@ void differential_analyzer::extract_call_id_markers(const common_chat_template & auto suffix_segments = segmentize_markers(before_func); for (const auto & seg : suffix_segments) { if (seg.type == segment_type::MARKER) { - result.markers.call_id_suffix = seg.value; - LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str()); + result.suffix = seg.value; break; } } - } else { - LOG_DBG("T6: Unable to determine call_id position\n"); } // When call_id is detected, per_call_end may have been incorrectly set to include // the call_id_suffix and sample args. Clear it if it starts with call_id_suffix. - if (result.call_id_pos != call_id_position::NONE && !result.markers.call_id_suffix.empty() && - result.markers.per_call_end.find(result.markers.call_id_suffix) == 0) { - result.markers.per_call_end.clear(); - LOG_DBG("T6: Cleared per_call_end (was incorrectly including call_id_suffix)\n"); - } -} - -void differential_analyzer::analyze_arguments(const common_chat_template & tmpl, diff_analysis_result & result) { - LOG_DBG(ANSI_ORANGE "Phase 4: Argument analysis\n" ANSI_RESET); - - extract_argument_name_markers(tmpl, result); - extract_argument_value_markers(tmpl, result); -} - -void differential_analyzer::extract_argument_name_markers(const common_chat_template & tmpl, - diff_analysis_result & result) { - json assistant_first_arg = json{ - { "role", "assistant" }, - { "content", "" }, - { "tool_calls", json::array({ first_tool_call_one_arg }) } - }; - - json assistant_second_arg = json{ - { "role", "assistant" }, - { "content", "" }, - { "tool_calls", json::array({ first_tool_call_other_arg }) } - }; - - template_params params; - params.messages = json::array({ user_msg, assistant_first_arg }); - params.tools = tools; - params.add_generation_prompt = false; - params.enable_thinking = true; - - auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); }); - - if (!comparison) { - LOG_DBG("A1: Template application failed\n"); - return; + if (result.pos != call_id_position::NONE && !result.suffix.empty() && + analysis.per_call_end.find(result.suffix) == 0) { + analysis.per_call_end.clear(); } - const auto & diff = comparison->diff; - LOG_DBG("A1 diff - suffix: '%s', left: '%s', right: '%s'\n", diff.suffix.c_str(), diff.left.c_str(), - diff.right.c_str()); - - if (!diff.left.empty() && !diff.right.empty()) { - size_t common_len = 0; - size_t min_len = std::min(diff.left.length(), diff.right.length()); - while (common_len < min_len && diff.left[common_len] == diff.right[common_len]) { - common_len++; - } - - if (common_len > 0) { // we have a marker structure with the name *inside* the marker - std::string common_prefix = diff.left.substr(0, common_len); - std::string left_remainder = diff.left.substr(common_len); - std::string right_remainder = diff.right.substr(common_len); - size_t left_close = - left_remainder.find_first_of("\"X"); // because arg-val is XXXX, can be quoted or unquoted - size_t right_close = right_remainder.find_first_of("\"Y"); // here arg-val is YYYY - - if (left_close != std::string::npos && right_close != std::string::npos) { - std::string left_name = left_remainder.substr(0, 5); // 5 = len("first") - std::string right_name = right_remainder.substr(0, 6); // 6 = len("second") - - if (left_name == "first" && right_name == "second") { - result.markers.arg_name_prefix = trim_whitespace(common_prefix); - std::string suffix_left = left_remainder.substr(5, left_close - 5); - std::string suffix_right = right_remainder.substr(6, right_close - 6); - if (suffix_left == suffix_right) { - result.markers.arg_name_suffix = trim_leading_whitespace(suffix_left); - } - LOG_DBG("A1: arg_name_prefix='%s', arg_name_suffix='%s'\n", result.markers.arg_name_prefix.c_str(), - result.markers.arg_name_suffix.c_str()); - } - } - } else if (diff.left.substr(0, 5) == "first" && diff.right.substr(0, 6) == "second") { - // we most likely have actual markers for argument names - auto pre_seg = segmentize_markers(diff.prefix); - for (int i = pre_seg.size() - 1; i >= 0; i--) { - result.markers.arg_name_prefix = result.markers.arg_name_prefix + pre_seg[i].value; - if (pre_seg[i].type == segment_type::MARKER) { - break; - } - } - auto left_seg = segmentize_markers(diff.left); - if (left_seg.size() == 1) { // only the name + maybe extra whitespace / normal chars in differing part - result.markers.arg_name_suffix = diff.left.substr(5); - auto suf_seg= segmentize_markers(diff.suffix); - for (size_t i = 0; i < suf_seg.size(); i++) { - result.markers.arg_name_suffix += suf_seg[i].value; - if (suf_seg[i].type == segment_type::MARKER) { - if (i < suf_seg.size() - 2 && suf_seg[i + 1].type == segment_type::TEXT && - trim_whitespace(suf_seg[i + 1].value).empty()) { - // we need to include post-marker whitespace/newlines as well - result.markers.arg_name_suffix += suf_seg[i + 1].value; - } - break; - } - } - } else { - for (size_t i = 0; i < left_seg.size(); i++) { - std::string to_add; - if (i == 0) { - to_add = left_seg[i].value.substr(5); - } else { - to_add = left_seg[i].value; - } - result.markers.arg_name_suffix += to_add; - if (left_seg[i].type == segment_type::MARKER) { - if (i < left_seg.size() - 2 && left_seg[i + 1].type == segment_type::TEXT && - trim_whitespace(left_seg[i + 1].value).empty()) { - // we need to include post-marker whitespace/newlines as well - result.markers.arg_name_suffix += left_seg[i + 1].value; - } - break; - } - } - } - } - } -} - -void differential_analyzer::extract_argument_value_markers(const common_chat_template & tmpl, - diff_analysis_result & result) { - json assistant_val_X = json{ - { "role", "assistant" }, - { "content", "" }, - { "tool_calls", json::array({ first_tool_call_one_arg }) } - }; - - json assistant_val_Y = json{ - { "role", "assistant" }, - { "content", "" }, - { "tool_calls", json::array({ first_tool_call_one_arg_other_val }) } - }; - - template_params params; - params.messages = json::array({ user_msg, assistant_val_X }); - params.tools = tools; - params.add_generation_prompt = false; - params.enable_thinking = true; - - auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); }); - - if (!comparison) { - LOG_DBG("A2: Template application failed\n"); - return; - } - - const auto & diff = comparison->diff; - LOG_DBG("A2 diff - suffix: '%s'\n", diff.suffix.c_str()); - LOG_DBG("A2 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); - - if (diff.left == "XXXX" && diff.right == "YYYY") { - std::string arg_name_ending = "first" + result.markers.arg_name_suffix; - std::string prefix = diff.prefix; - if (prefix.rfind(arg_name_ending) != std::string::npos) { - prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size()); - } - if (!prefix.empty()) { - auto seg_pre = segmentize_markers(prefix); - for (int i = seg_pre.size() - 1; i >= 0; i--) { - result.markers.arg_value_prefix = seg_pre[i].value + result.markers.arg_value_prefix; - if (seg_pre[i].type == segment_type::MARKER) { - break; - } - } - } - - std::string value_suffix = diff.suffix; - if (!result.markers.func_close.empty()) { - size_t func_close_pos = value_suffix.find(result.markers.func_close); - if (func_close_pos != std::string::npos) { - value_suffix = value_suffix.substr(0, func_close_pos); - } - } else if (!result.markers.per_call_end.empty() || !result.markers.tool_section_end.empty()) { - std::string end_marker = - !result.markers.per_call_end.empty() ? result.markers.per_call_end : result.markers.tool_section_end; - size_t end_marker_pos = value_suffix.find(end_marker); - if (end_marker_pos != std::string::npos) { - value_suffix = value_suffix.substr(0, end_marker_pos); - } - } - value_suffix = trim_leading_whitespace(value_suffix); - if (!value_suffix.empty()) { - result.markers.arg_value_suffix = value_suffix; - } - - LOG_DBG("A2: arg_value_prefix='%s', arg_value_suffix='%s'\n", result.markers.arg_value_prefix.c_str(), - result.markers.arg_value_suffix.c_str()); - } + return result; } void differential_analyzer::collect_preserved_tokens(diff_analysis_result & result) { @@ -1623,23 +1530,24 @@ void differential_analyzer::collect_preserved_tokens(diff_analysis_result & resu } }; - add_token(result.markers.reasoning_start); - add_token(result.markers.reasoning_end); - add_token(result.markers.content_start); - add_token(result.markers.content_end); - add_token(result.markers.tool_section_start); - add_token(result.markers.tool_section_end); - add_token(result.markers.per_call_start); - add_token(result.markers.per_call_end); - add_token(result.markers.func_name_prefix); - add_token(result.markers.func_name_suffix); - add_token(result.markers.func_close); - add_token(result.markers.arg_name_prefix); - add_token(result.markers.arg_name_suffix); - add_token(result.markers.arg_separator); - add_token(result.markers.arg_value_prefix); - add_token(result.markers.arg_value_suffix); - add_token(result.markers.call_id_prefix); - add_token(result.markers.call_id_suffix); - add_token(result.markers.code_block_marker); + add_token(result.reasoning.start); + add_token(result.reasoning.end); + add_token(result.content.start); + add_token(result.content.end); + add_token(result.tools.format.section_start); + add_token(result.tools.format.section_end); + add_token(result.tools.format.per_call_start); + add_token(result.tools.format.per_call_end); + add_token(result.tools.function.name_prefix); + add_token(result.tools.function.name_suffix); + add_token(result.tools.function.close); + add_token(result.tools.arguments.start); + add_token(result.tools.arguments.end); + add_token(result.tools.arguments.name_prefix); + add_token(result.tools.arguments.name_suffix); + add_token(result.tools.arguments.separator); + add_token(result.tools.arguments.value_prefix); + add_token(result.tools.arguments.value_suffix); + add_token(result.tools.call_id.prefix); + add_token(result.tools.call_id.suffix); } diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h index ce729df0e6..c035203923 100644 --- a/common/chat-diff-analyzer.h +++ b/common/chat-diff-analyzer.h @@ -1,6 +1,7 @@ #pragma once #include "chat.h" +#include "jinja/caps.h" #include "nlohmann/json.hpp" #include @@ -15,11 +16,11 @@ using json = nlohmann::ordered_json; // Parameters for template application // ============================================================================ struct template_params { - json messages; - json tools; - bool add_generation_prompt = false; - bool enable_thinking = true; - std::optional extra_context = std::nullopt; + json messages; + json tools; + bool add_generation_prompt = false; + bool enable_thinking = true; + std::optional extra_context = std::nullopt; }; struct diff_split { @@ -35,9 +36,9 @@ struct diff_split { // Result of compare_variants containing diff and original outputs struct compare_variants_result { - diff_split diff; - std::string output_A; - std::string output_B; + diff_split diff; + std::string output_A; + std::string output_B; }; // ============================================================================ @@ -77,29 +78,23 @@ struct marker_registry { std::string arg_separator; // e.g., "", "\n", "," // === Call ID markers (for non-JSON formats with tool call IDs) === - std::string call_id_prefix; // e.g., "[CALL_ID]" (marker before call ID value) - std::string call_id_suffix; // e.g., "" (marker after call ID value, before next section) - - // === Special markers === - std::string code_block_marker; // e.g., "Action:" (for markdown code block format) - std::string code_block_language; // e.g., "json" - std::string function_namespace; // e.g., "functions." (for prefixed-indexed format) + std::string call_id_prefix; // e.g., "[CALL_ID]" (marker before call ID value) + std::string call_id_suffix; // e.g., "" (marker after call ID value, before next section) }; - // ============================================================================ // Analysis Result Enums // ============================================================================ // Reasoning handling mode (derived from R1-R3 comparisons) enum class reasoning_mode { - NONE, // No reasoning markers detected - TAG_BASED, // Standard tag-based: ... - DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter) - FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end) - FORCED_CLOSED,// Template ends with open reasoning tag on enabled thinking but - // with both opened and closed tag for disabled thinking - TOOLS_ONLY // Only reason on tool calls, not on normal content + NONE, // No reasoning markers detected + TAG_BASED, // Standard tag-based: ... + DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter) + FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end) + FORCED_CLOSED, // Template ends with open reasoning tag on enabled thinking but + // with both opened and closed tag for disabled thinking + TOOLS_ONLY // Only reason on tool calls, not on normal content }; inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) { @@ -143,10 +138,10 @@ inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) { // Call ID position in tool calls (for non-JSON formats) enum class call_id_position { - NONE, // No call ID support detected - PRE_FUNC_NAME, // Call ID before function name: [CALL_ID]id[FUNC]name{args} - BETWEEN_FUNC_AND_ARGS, // Call ID between function and args: [FUNC]name[CALL_ID]id{args} - POST_ARGS, // Call ID after arguments: [FUNC]name{args}[CALL_ID]id + NONE, // No call ID support detected + PRE_FUNC_NAME, // Call ID before function name: [CALL_ID]id[FUNC]name{args} + BETWEEN_FUNC_AND_ARGS, // Call ID between function and args: [FUNC]name[CALL_ID]id{args} + POST_ARGS, // Call ID after arguments: [FUNC]name{args}[CALL_ID]id }; inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) { @@ -166,10 +161,10 @@ inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos // Tool call format classification (derived from T1-T5, A1-A3 comparisons) enum class tool_format { - NONE, // No tool support detected - JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}} - TAG_WITH_JSON, // Tag-based with JSON args: {...} - TAG_WITH_TAGGED, // Tag-based with tagged args: value + NONE, // No tool support detected + JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}} + TAG_WITH_JSON, // Tag-based with JSON args: {...} + TAG_WITH_TAGGED, // Tag-based with tagged args: value }; inline std::ostream & operator<<(std::ostream & os, const tool_format & format) { @@ -187,33 +182,77 @@ inline std::ostream & operator<<(std::ostream & os, const tool_format & format) } } +struct reasoning_analysis { + reasoning_mode mode = reasoning_mode::NONE; + + std::string start; // e.g., "", "[THINK]", "<|START_THINKING|>", "" + std::string end; // e.g., "", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>" +}; + +struct content_analysis { + content_mode mode = content_mode::PLAIN; + + std::string start; // e.g., "", ">>>all\n", "" + std::string end; // e.g., "", "" + + bool requires_nonnull_content = false; +}; + +struct tool_format_analysis { + tool_format mode = tool_format::NONE; + + std::string section_start; // e.g., "", "[TOOL_CALLS]", "" + std::string section_end; // e.g., "", "" + std::string per_call_start; // e.g., "<|tool_call_begin|>", "" (for multi-call templates) + std::string per_call_end; // e.g., "<|tool_call_end|>", "" + + bool fun_name_is_key = false; // In JSON format function name is JSON key, i.e. { "": { ... arguments ... } } + bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...] + + std::string function_field = "function"; + std::string name_field = "name"; + std::string args_field = "arguments"; + std::string id_field; + std::string gen_id_field; + std::vector parameter_order; +}; + +struct tool_function_analysis { + std::string name_prefix; // e.g., "", "\"", ":0" + std::string close; // e.g., "", "" (for tag-based) +}; + +struct tool_arguments_analysis { + std::string start; // e.g., "<|tool_call_argument_begin|>", "" + std::string end; // e.g., "<|tool_call_argument_end|>", "" + std::string name_prefix; // e.g., "", "\"" + std::string name_suffix; // e.g., ">", "
", "\":" + std::string value_prefix; // e.g., "", "", "" + std::string value_suffix; // e.g., "", "", "" + std::string separator; // e.g., "", "\n", "," +}; + +struct tool_id_analysis { + call_id_position pos = call_id_position::NONE; + + std::string prefix; // e.g., "[CALL_ID]" (marker before call ID value) + std::string suffix; // e.g., "" (marker after call ID value, before next section) +}; + +struct tool_analysis { + tool_format_analysis format; + tool_function_analysis function; + tool_arguments_analysis arguments; + tool_id_analysis call_id; +}; + // Complete result of differential analysis struct diff_analysis_result { - // Classification results - reasoning_mode reasoning = reasoning_mode::NONE; - content_mode content = content_mode::PLAIN; - tool_format tools = tool_format::NONE; - - // All extracted markers - marker_registry markers; - - // JSON field names (for JSON-based formats) - bool fun_name_is_key = false; - std::string function_field = "function"; - std::string name_field = "name"; - std::string args_field = "arguments"; - std::string id_field; - std::string gen_id_field; - std::vector parameter_order; - - // Call ID position (for non-JSON formats) - call_id_position call_id_pos = call_id_position::NONE; - - // Flags - bool supports_tools = false; - bool supports_parallel_calls = false; - bool requires_nonnull_content = false; - bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...] + jinja::caps jinja_caps; + reasoning_analysis reasoning; + content_analysis content; + tool_analysis tools; // Preserved tokens for tokenizer (union of all non-empty markers) std::vector preserved_tokens; @@ -227,94 +266,102 @@ class differential_analyzer { static diff_analysis_result analyze(const common_chat_template & tmpl); // Phase-specific analysis (can be called individually for testing) - static void analyze_reasoning(const common_chat_template & tmpl, diff_analysis_result & result); - static void analyze_content(const common_chat_template & tmpl, diff_analysis_result & result); - static void analyze_tools(const common_chat_template & tmpl, diff_analysis_result & result); - static void analyze_arguments(const common_chat_template & tmpl, diff_analysis_result & result); + static reasoning_analysis analyze_reasoning(const common_chat_template & tmpl, bool supports_tools); + static content_analysis analyze_content(const common_chat_template & tmpl, const reasoning_analysis & reasoning); + static tool_analysis analyze_tools(const common_chat_template & tmpl, + const jinja::caps & caps, + const reasoning_analysis & reasoning); // Factorized differential comparison function (public for testing) // Takes base params and a single modifier lambda to create variant B // Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure static std::optional compare_variants( - const common_chat_template & tmpl, - const template_params & params_A, - const std::function & params_modifier); + const common_chat_template & tmpl, + const template_params & params_A, + const std::function & params_modifier); private: // Comparison helpers (implement the comparison matrix from the plan) - // R1: Extract reasoning markers by comparing with/without reasoning_content - static void compare_reasoning_presence(const common_chat_template & tmpl, diff_analysis_result & result); + // 1. Reasoning analysis: + // Look for reasoning markers in rendered content + static void compare_reasoning_presence(const common_chat_template & tmpl, reasoning_analysis & reasoning); - // R2: Detect forced-open reasoning by comparing enable_thinking=false vs true - static void compare_thinking_enabled(const common_chat_template & tmpl, diff_analysis_result & result); + // Compare generation prompt with enable_thinking=true vs false + static void compare_thinking_enabled(const common_chat_template & tmpl, reasoning_analysis & reasoning); - // R3: Detect reasoning scope (content-only vs with tools) - static void compare_reasoning_scope(const common_chat_template & tmpl, diff_analysis_result & result); + // Check if reasoning is always possible or only in tool calls + static void compare_reasoning_scope(const common_chat_template & tmpl, reasoning_analysis & reasoning); - // C1: Extract content markers by comparing different content values - static void compare_content_values(const common_chat_template & tmpl, diff_analysis_result & result); + // 2. Content (fully inside analyze_content mentioned above) - // T1: Analyze the tool calls - static void analyze_tool_calls(const common_chat_template & tmpl, diff_analysis_result & result); + // 3. Tool calls + // a. format + // Extract tool calling 'haystack' for further analysis and delegate further analysis based on format + static tool_format_analysis analyze_tool_calls(const common_chat_template & tmpl, + const reasoning_analysis & reasoning); - // Analyzes a tool call section to determine the format used (pure JSON, function name markers, or full markers) - static void analyze_tool_call_format(const std::string & haystack, - const std::string & fun_name_needle, - const std::string & arg_name_needle, - diff_analysis_result & result); + // Analyze format based on position of function and argument name in needle + static tool_format_analysis analyze_tool_call_format(const std::string & haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + const reasoning_analysis & reasoning); - // Helper functions to handle the two branches of analyze_tool_call_format + // Analyze specifics of JSON native format (entire tool call is a JSON object) static void analyze_tool_call_format_json_native(const std::string & clean_haystack, const std::string & fun_name_needle, const std::string & arg_name_needle, - diff_analysis_result & result); + tool_format_analysis & format); + // Analyze specifics of non-JSON native format (tags for function name or for function name and arguments) static void analyze_tool_call_format_non_json(const std::string & clean_haystack, const std::string & fun_name_needle, - diff_analysis_result & result); + tool_format_analysis & format); - // T2: Check if markers are per call or per section - static void check_per_call_markers(const common_chat_template & tmpl, diff_analysis_result & result); + // Check for and extract specific per-call markers for non-native-JSON templates with parallel call support + static void check_per_call_markers(const common_chat_template & tmpl, tool_format_analysis & result); - // T3: Extract call separator; also outputs second_call_content for per-call detection - static void extract_call_separator(const common_chat_template & tmpl, diff_analysis_result & result, - std::string & second_call_content); + // Logic below is only for non-JSON-native tool calling formats + // 3. b. function name + // Extract function name markers + static tool_function_analysis extract_function_markers(const common_chat_template & tmpl, + const tool_format_analysis & analysis); - // T4: Analyze function name format and extract markers - static void extract_function_markers(const common_chat_template & tmpl, - diff_analysis_result & result); + // 4. c. function arguments + // Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis + static tool_arguments_analysis analyze_arguments(const common_chat_template & tmpl, + const tool_analysis & analysis); - // T5: Extract argument separator - static void extract_argument_separator(const common_chat_template & tmpl, diff_analysis_result & result); + // Extract argument name markers + static void extract_argument_name_markers(const common_chat_template & tmpl, + tool_arguments_analysis & args_analysis); - // T6: Extract args container markers - static void extract_args_markers(const common_chat_template & tmpl, diff_analysis_result & result); + // Extract argument value markers + static void extract_argument_value_markers(const common_chat_template & tmpl, + const tool_analysis & analysis, + tool_arguments_analysis & args_analysis); - // A1: Extract argument name markers - static void extract_argument_name_markers(const common_chat_template & tmpl, diff_analysis_result & result); + // Extract argument separator, if specified (eg. ......) + static void extract_argument_separator(const common_chat_template & tmpl, + tool_arguments_analysis & args_analysis); - // A2: Extract argument value markers - static void extract_argument_value_markers(const common_chat_template & tmpl, diff_analysis_result & result); + // Extract argument wrapper markers, if present (eg. '......') + static void extract_args_markers(const common_chat_template & tmpl, + const tool_analysis & analysis, + tool_arguments_analysis & args_analysis); - // T7: Extract call ID markers (for non-JSON formats) - static void extract_call_id_markers(const common_chat_template & tmpl, diff_analysis_result & result); + // 4. d. function call id + // Extract call ID markers, if present + static tool_id_analysis extract_call_id_markers(const common_chat_template & tmpl, + tool_format_analysis & analysis); - // Classify tool format based on extracted markers - static void classify_tool_format(diff_analysis_result & result); - - // Classification helpers + // Collect tokens from entire analysis to preserve static void collect_preserved_tokens(diff_analysis_result & result); - // Utility: Apply template with given parameters - static std::string apply_template(const common_chat_template & tmpl, - const template_params & params); + static std::string apply_template(const common_chat_template & tmpl, const template_params & params); }; -enum segment_type { - TEXT, - MARKER -}; +enum segment_type { TEXT, MARKER }; inline std::ostream & operator<<(std::ostream & os, const segment_type & type) { switch (type) { @@ -329,7 +376,7 @@ inline std::ostream & operator<<(std::ostream & os, const segment_type & type) { struct segment { segment_type type; - std::string value; + std::string value; segment(segment_type type, std::string value) : type(type), value(std::move(value)) {} }; diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index cb38fb160f..6e58dc6761 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -167,13 +167,13 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) { bool is_content = node.tag == common_chat_peg_builder::CONTENT; if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here - result.reasoning_content += std::string(trim_trailing_space(node.text)); + result.reasoning_content += std::string(node.text); } if (is_content) { // Concatenate content from multiple content nodes (e.g., when reasoning markers // are preserved before content markers in reasoning_format=NONE mode) - result.content += std::string(trim_trailing_space(node.text)); + result.content += std::string(node.text); } } diff --git a/common/chat.cpp b/common/chat.cpp index 3fb17b4e9d..be4e19aebc 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -240,7 +240,7 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates * ? *chat_templates->template_tool_use : *chat_templates->template_default; diff_analysis_result result = differential_analyzer::analyze(tmpl); - detect |= result.reasoning != reasoning_mode::NONE; + detect |= result.reasoning.mode != reasoning_mode::NONE; return detect; } diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp index abd4cd2d9f..aecad6efa6 100644 --- a/common/jinja/caps.cpp +++ b/common/jinja/caps.cpp @@ -42,7 +42,7 @@ static void caps_try_execute(jinja::program & prog, jinja::runtime runtime(ctx); auto results = runtime.execute(prog); auto parts = jinja::runtime::gather_string_parts(results); - std::string result = parts->as_string().str(); + result = parts->as_string().str(); success = true; } catch (const std::exception & e) { JJ_DEBUG("Exception during execution: %s", e.what()); @@ -95,6 +95,8 @@ caps caps_get(jinja::program & prog) { return v->stats.ops.find(op_name) != v->stats.ops.end(); }; + JJ_DEBUG("%s\n", ">>> Running capability check: typed content"); + // case: typed content support caps_try_execute( prog, @@ -125,6 +127,7 @@ caps caps_get(jinja::program & prog) { } ); + JJ_DEBUG("%s\n", ">>> Running capability check: system prompt"); // case: system prompt support caps_try_execute( @@ -155,6 +158,8 @@ caps caps_get(jinja::program & prog) { } ); + JJ_DEBUG("%s\n", ">>> Running capability check: tool support"); + // case: tools support caps_try_execute( prog, @@ -167,7 +172,7 @@ caps caps_get(jinja::program & prog) { }, { {"role", "assistant"}, - {"content", "Assistant message"}, + {"content", ""}, // Some templates expect content to be empty with tool calls {"tool_calls", json::array({ { {"id", "call00001"}, @@ -260,6 +265,8 @@ caps caps_get(jinja::program & prog) { } ); + JJ_DEBUG("%s\n", ">>> Running capability check: preserve reasoning"); + // case: preserve reasoning content in chat history caps_try_execute( prog, diff --git a/common/jinja/runtime.cpp b/common/jinja/runtime.cpp index cc012c892f..b7e71115ed 100644 --- a/common/jinja/runtime.cpp +++ b/common/jinja/runtime.cpp @@ -114,8 +114,10 @@ value binary_expression::execute_impl(context & ctx) { // Logical operators if (op.value == "and") { + JJ_DEBUG("Executing logical test: %s AND %s", left->type().c_str(), right->type().c_str()); return left_val->as_bool() ? right->execute(ctx) : std::move(left_val); } else if (op.value == "or") { + JJ_DEBUG("Executing logical test: %s OR %s", left->type().c_str(), right->type().c_str()); return left_val->as_bool() ? std::move(left_val) : right->execute(ctx); } @@ -835,7 +837,7 @@ value call_expression::execute_impl(context & ctx) { for (auto & arg_stmt : this->args) { auto arg_val = arg_stmt->execute(ctx); JJ_DEBUG(" Argument type: %s", arg_val->type().c_str()); - args.push_back(std::move(arg_val)); + args.push_back(arg_val); } // execute callee value callee_val = callee->execute(ctx); diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja index 6ef7fb123c..2fd1c415b8 100644 --- a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja +++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja @@ -27,7 +27,7 @@ {%- set ns.is_first = false -%} {%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}} {%- endif -%} - {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none -%} + {%- if message['role'] == 'assistant' and message['tool_calls'] -%} {%- if ns.is_last_user -%}{{'<|Assistant|>'}} {%- endif -%} {%- set ns.is_last_user = false -%} @@ -35,7 +35,7 @@ {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls'] -%} {%- if not ns.is_first -%} - {%- if message['content'] is none -%}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} + {%- if not message['content'] -%}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} {%- else -%}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}} {%- endif -%} {%- set ns.is_first = true -%} @@ -43,7 +43,7 @@ {%- endif -%} {%- endfor -%}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif -%} - {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) -%} + {%- if message['role'] == 'assistant' and not message['tool_calls'] -%} {%- if ns.is_last_user -%}{{'<|Assistant|>'}} {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{''}} {%- else -%}{{''}} diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index 90edaba32d..b2bdab15e9 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -57,7 +57,6 @@ static void test_nemotron_tool_format(testing & t); // CohereForAI template analysis tests static void test_cohere_reasoning_detection(testing & t); -static void test_cohere_tool_format(testing & t); static void test_cohere_analysis(testing & t); // Marker separation @@ -1283,18 +1282,18 @@ static void test_nemotron_reasoning_detection(testing & t) { auto analysis = differential_analyzer::analyze(tmpl); // Check reasoning markers - t.assert_equal("reasoning_start should be ''", "", analysis.markers.reasoning_start); - t.assert_equal("reasoning_end should be ''", "", analysis.markers.reasoning_end); + t.assert_equal("reasoning_start should be ''", "", analysis.reasoning.start); + t.assert_equal("reasoning_end should be ''", "", analysis.reasoning.end); // Check reasoning mode detection // Nemotron uses forced closed reasoning with add_generation_prompt - t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning); + t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning.mode); // Make sure reasoning markers don't spill over to content markers - t.assert_equal("content start should be empty", "", analysis.markers.content_start); - t.assert_equal("content end should be empty", "", analysis.markers.content_end); + t.assert_equal("content start should be empty", "", analysis.content.start); + t.assert_equal("content end should be empty", "", analysis.content.end); - t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content); + t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content.mode); } static void test_nemotron_tool_format(testing & t) { @@ -1304,27 +1303,27 @@ static void test_nemotron_tool_format(testing & t) { auto analysis = differential_analyzer::analyze(tmpl); // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped) - t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.markers.tool_section_start); - t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.markers.tool_section_end); - t.assert_equal("per_call_start should be '\\n'", "\n", analysis.markers.per_call_start); - t.assert_equal("per_call_end should be ''", "", analysis.markers.per_call_end); - t.assert_true("should support parallel calls", analysis.supports_parallel_calls); + t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.tools.format.section_start); + t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.tools.format.section_end); + t.assert_equal("per_call_start should be '\\n'", "\n", analysis.tools.format.per_call_start); + t.assert_equal("per_call_end should be ''", "", analysis.tools.format.per_call_end); + t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls); // Check function markers - t.assert_equal("func_name_prefix should be '\\n'", ">\n", analysis.markers.func_name_suffix); - t.assert_equal("func_close should be '\\n'", "\n", analysis.markers.func_close); + t.assert_equal("func_name_prefix should be '\\n'", ">\n", analysis.tools.function.name_suffix); + t.assert_equal("func_close should be '\\n'", "\n", analysis.tools.function.close); // Check argument markers (note: markers retain trailing newlines for proper parsing) - t.assert_equal("arg_name_prefix should be '\\n'", ">\n", analysis.markers.arg_name_suffix); - t.assert_equal("arg_value_suffix should be '\\n'", "\n", analysis.markers.arg_value_suffix); + t.assert_equal("arg_name_prefix should be '\\n'", ">\n", analysis.tools.arguments.name_suffix); + t.assert_equal("arg_value_suffix should be '\\n'", "\n", analysis.tools.arguments.value_suffix); // Check format classification - t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools == tool_format::TAG_WITH_TAGGED); + t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED); // Verify tool support - t.assert_true("should support tools", analysis.supports_tools); + t.assert_true("should support tools", analysis.jinja_caps.supports_tools); } static common_chat_template load_cohere_template(testing & t) { @@ -1333,7 +1332,6 @@ static common_chat_template load_cohere_template(testing & t) { static void test_cohere_analysis(testing & t) { t.test("Cohere reasoning detection", test_cohere_reasoning_detection); - t.test("Cohere tool format", test_cohere_tool_format); } static void test_cohere_reasoning_detection(testing & t) { @@ -1343,64 +1341,64 @@ static void test_cohere_reasoning_detection(testing & t) { auto analysis = differential_analyzer::analyze(tmpl); // Check reasoning markers - Cohere uses special token format - t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.markers.reasoning_start); - t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.markers.reasoning_end); + t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.reasoning.start); + t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.reasoning.end); // Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY) - t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning); + t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning.mode); // Check content markers - Cohere wraps all content with START/END_RESPONSE - t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.markers.content_start); - t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.markers.content_end); + t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.content.start); + t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.content.end); // Content is always wrapped (both with and without tools) - t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content); + t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content.mode); } -static void test_cohere_tool_format(testing & t) { +static void test_tool_format_cohere(testing & t) { common_chat_template tmpl = load_cohere_template(t); // Run differential analysis auto analysis = differential_analyzer::analyze(tmpl); // Check tool section markers - Cohere uses ACTION markers - t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.markers.tool_section_start); - t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.markers.tool_section_end); + t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.tools.format.section_start); + t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.tools.format.section_end); // JSON_NATIVE format has no per-call markers - t.assert_equal("per_call_start should be empty", "", analysis.markers.per_call_start); - t.assert_equal("per_call_end should be empty", "", analysis.markers.per_call_end); + t.assert_equal("per_call_start should be empty", "", analysis.tools.format.per_call_start); + t.assert_equal("per_call_end should be empty", "", analysis.tools.format.per_call_end); // JSON_NATIVE format has empty function markers (no XML-style markers) - t.assert_equal("func_name_prefix should be empty", "", analysis.markers.func_name_prefix); - t.assert_equal("func_name_suffix should be empty", "", analysis.markers.func_name_suffix); - t.assert_equal("func_close should be empty", "", analysis.markers.func_close); + t.assert_equal("func_name_prefix should be empty", "", analysis.tools.function.name_prefix); + t.assert_equal("func_name_suffix should be empty", "", analysis.tools.function.name_suffix); + t.assert_equal("func_close should be empty", "", analysis.tools.function.close); // JSON_NATIVE format has empty args markers - t.assert_equal("args_start should be empty", "", analysis.markers.args_start); - t.assert_equal("args_end should be empty", "", analysis.markers.args_end); + t.assert_equal("args_start should be empty", "", analysis.tools.arguments.start); + t.assert_equal("args_end should be empty", "", analysis.tools.arguments.end); // JSON_NATIVE format has empty argument markers - t.assert_equal("arg_name_prefix should be empty", "", analysis.markers.arg_name_prefix); - t.assert_equal("arg_name_suffix should be empty", "", analysis.markers.arg_name_suffix); - t.assert_equal("arg_value_prefix should be empty", "", analysis.markers.arg_value_prefix); - t.assert_equal("arg_value_suffix should be empty", "", analysis.markers.arg_value_suffix); - t.assert_equal("arg_separator should be empty", "", analysis.markers.arg_separator); + t.assert_equal("arg_name_prefix should be empty", "", analysis.tools.arguments.name_prefix); + t.assert_equal("arg_name_suffix should be empty", "", analysis.tools.arguments.name_suffix); + t.assert_equal("arg_value_prefix should be empty", "", analysis.tools.arguments.value_prefix); + t.assert_equal("arg_value_suffix should be empty", "", analysis.tools.arguments.value_suffix); + t.assert_equal("arg_separator should be empty", "", analysis.tools.arguments.separator); // Check JSON field names - Cohere uses non-standard names - t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.name_field); - t.assert_equal("args_field should be 'parameters'", "parameters", analysis.args_field); + t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.tools.format.name_field); + t.assert_equal("args_field should be 'parameters'", "parameters", analysis.tools.format.args_field); // This isn't a real tool call id field, i.e. with the OpenAI tool call ID format - t.assert_equal("id_field should be 'tool_call_id'", "", analysis.id_field); + t.assert_equal("id_field should be 'tool_call_id'", "", analysis.tools.format.id_field); // Check format classification - t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools); + t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools.format.mode); // Check flags - t.assert_true("should support tools", analysis.supports_tools); - t.assert_true("should support parallel calls", analysis.supports_parallel_calls); - t.assert_true("should not require nonnull content", !analysis.requires_nonnull_content); - t.assert_true("tools_array_wrapped should be true", analysis.tools_array_wrapped); + t.assert_true("should support tools", analysis.jinja_caps.supports_tools); + t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls); + t.assert_true("should not require nonnull content", !analysis.content.requires_nonnull_content); + t.assert_true("tools_array_wrapped should be true", analysis.tools.format.tools_array_wrapped); } // ============================================================================ diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp index ae82966699..d59880e3dc 100644 --- a/tests/test-chat-peg-parser.cpp +++ b/tests/test-chat-peg-parser.cpp @@ -530,7 +530,7 @@ static void test_example_qwen3_non_coder(testing & t) { auto mapper = common_chat_peg_unified_mapper(msg); mapper.from_ast(ctx.ast, result); - t.assert_equal("content", "I need to get the weather.", msg.content); + t.assert_equal("content", "I need to get the weather.\n", msg.content); t.assert_equal("reasoning", "", msg.reasoning_content); t.assert_equal("tool calls count", 1u, msg.tool_calls.size()); if (!msg.tool_calls.empty()) { diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 8b331129b1..91ff83a729 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -32,6 +32,7 @@ static std::string HELP = R"( Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE Options: -h, --help Show this help message and exit. + --with-tools Add a tool and a tool call to the default JSON input --json Path to the JSON input file. --stop-on-first-fail Stop testing on the first failure (default: false). --no-common Use direct Jinja engine instead of common chat templates (default: use common). @@ -57,12 +58,65 @@ static std::string DEFAULT_JSON = R"({ "add_generation_prompt": true })"; +static std::string DEFAULT_JSON_WITH_TOOLS = R"({ + "messages": [ + { + "role": "user", + "content": "Hello, how are you?" + }, + { + "role": "assistant", + "content": "I am fine, thank you!" + }, + { + "role": "user", + "content": "Call a tool!" + }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "call00001", + "type": "function", + "function": { + "name": "test", + "arguments": { "arg": "hello" } + } + } + ] + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "test", + "description": "Test", + "parameters": { + "type": "object", + "properties": { + "arg": { + "type": "string" + } + } + }, + "required": ["arg"] + } + } + ], + "bos_token": "", + "eos_token": "", + "add_generation_prompt": true +})"; + + int main(int argc, char ** argv) { std::vector args(argv, argv + argc); std::string tmpl_path; std::string json_path; std::string output_path; + std::string & json_to_use = DEFAULT_JSON; bool stop_on_first_fail = false; bool use_common = true; @@ -74,6 +128,8 @@ int main(int argc, char ** argv) { if (args[i] == "--json" && i + 1 < args.size()) { json_path = args[i + 1]; i++; + } else if (args[i] == "--with-tools") { + json_to_use = DEFAULT_JSON_WITH_TOOLS; } else if (args[i] == "--stop-on-first-fail") { stop_on_first_fail = true; } else if (args[i] == "--output" && i + 1 < args.size()) { @@ -106,7 +162,7 @@ int main(int argc, char ** argv) { std::istreambuf_iterator()); input_json = json::parse(content); } else { - input_json = json::parse(DEFAULT_JSON); + input_json = json::parse(json_to_use); } std::filesystem::path p(tmpl_path); diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp index c0e29c548a..c87f3c8e35 100644 --- a/tools/parser/debug-template-parser.cpp +++ b/tools/parser/debug-template-parser.cpp @@ -419,33 +419,33 @@ int main(int argc, char ** argv) { LOG_ERR("\n=== Differential Analysis Results ===\n"); LOG_ERR("\n--- Reasoning & Content Structure ---\n"); - LOG_ERR("reasoning_mode: %s\n", mode_to_str(analysis.reasoning).c_str()); - LOG_ERR("reasoning_start: '%s'\n", analysis.markers.reasoning_start.c_str()); - LOG_ERR("reasoning_end: '%s'\n", analysis.markers.reasoning_end.c_str()); - LOG_ERR("content_mode: %s\n", mode_to_str(analysis.content).c_str()); - LOG_ERR("content_start: '%s'\n", analysis.markers.content_start.c_str()); - LOG_ERR("content_end: '%s'\n", analysis.markers.content_end.c_str()); + LOG_ERR("reasoning_mode: %s\n", mode_to_str(analysis.reasoning.mode).c_str()); + LOG_ERR("reasoning_start: '%s'\n", analysis.reasoning.start.c_str()); + LOG_ERR("reasoning_end: '%s'\n", analysis.reasoning.end.c_str()); + LOG_ERR("content_mode: %s\n", mode_to_str(analysis.content.mode).c_str()); + LOG_ERR("content_start: '%s'\n", analysis.content.start.c_str()); + LOG_ERR("content_end: '%s'\n", analysis.content.end.c_str()); LOG_ERR("\n--- Tool Call Structure ---\n"); - LOG_ERR("tool_mode: %s\n", mode_to_str(analysis.tools).c_str()); - LOG_ERR("supports_tools: %s\n", analysis.supports_tools ? "true" : "false"); - LOG_ERR("supports_parallel_calls: %s\n", analysis.supports_parallel_calls ? "true" : "false"); - LOG_ERR("tool_section_start: '%s'\n", analysis.markers.tool_section_start.c_str()); - LOG_ERR("tool_section_end: '%s'\n", analysis.markers.tool_section_end.c_str()); - LOG_ERR("per_call_start: '%s'\n", analysis.markers.per_call_start.c_str()); - LOG_ERR("per_call_end: '%s'\n", analysis.markers.per_call_end.c_str()); - LOG_ERR("func_name_prefix: '%s'\n", analysis.markers.func_name_prefix.c_str()); - LOG_ERR("func_name_suffix: '%s'\n", analysis.markers.func_name_suffix.c_str()); - LOG_ERR("func_close: '%s'\n", analysis.markers.func_close.c_str()); - LOG_ERR("arg_name_prefix: '%s'\n", analysis.markers.arg_name_prefix.c_str()); - LOG_ERR("arg_name_suffix: '%s'\n", analysis.markers.arg_name_suffix.c_str()); - LOG_ERR("arg_value_prefix: '%s'\n", analysis.markers.arg_value_prefix.c_str()); - LOG_ERR("arg_value_suffix: '%s'\n", analysis.markers.arg_value_suffix.c_str()); - LOG_ERR("name_field: '%s'\n", analysis.name_field.c_str()); - LOG_ERR("args_field: '%s'\n", analysis.args_field.c_str()); - LOG_ERR("id_field: '%s'\n", analysis.id_field.c_str()); - LOG_ERR("gen_id_field: '%s'\n", analysis.gen_id_field.c_str()); - LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.parameter_order.begin(), analysis.parameter_order.end(), + LOG_ERR("tool_mode: %s\n", mode_to_str(analysis.tools.format.mode).c_str()); + LOG_ERR("supports_tools: %s\n", analysis.jinja_caps.supports_tools ? "true" : "false"); + LOG_ERR("supports_parallel_calls: %s\n", analysis.jinja_caps.supports_parallel_tool_calls ? "true" : "false"); + LOG_ERR("tool_section_start: '%s'\n", analysis.tools.format.section_start.c_str()); + LOG_ERR("tool_section_end: '%s'\n", analysis.tools.format.section_end.c_str()); + LOG_ERR("per_call_start: '%s'\n", analysis.tools.format.per_call_start.c_str()); + LOG_ERR("per_call_end: '%s'\n", analysis.tools.format.per_call_end.c_str()); + LOG_ERR("func_name_prefix: '%s'\n", analysis.tools.function.name_prefix.c_str()); + LOG_ERR("func_name_suffix: '%s'\n", analysis.tools.function.name_suffix.c_str()); + LOG_ERR("func_close: '%s'\n", analysis.tools.function.close.c_str()); + LOG_ERR("arg_name_prefix: '%s'\n", analysis.tools.arguments.name_prefix.c_str()); + LOG_ERR("arg_name_suffix: '%s'\n", analysis.tools.arguments.name_suffix.c_str()); + LOG_ERR("arg_value_prefix: '%s'\n", analysis.tools.arguments.value_prefix.c_str()); + LOG_ERR("arg_value_suffix: '%s'\n", analysis.tools.arguments.value_suffix.c_str()); + LOG_ERR("name_field: '%s'\n", analysis.tools.format.name_field.c_str()); + LOG_ERR("args_field: '%s'\n", analysis.tools.format.args_field.c_str()); + LOG_ERR("id_field: '%s'\n", analysis.tools.format.id_field.c_str()); + LOG_ERR("gen_id_field: '%s'\n", analysis.tools.format.gen_id_field.c_str()); + LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.tools.format.parameter_order.begin(), analysis.tools.format.parameter_order.end(), std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; } ).c_str()); @@ -470,11 +470,13 @@ int main(int argc, char ** argv) { LOG_ERR(" '%s'\n", token.c_str()); } - LOG_ERR("\n=== Verifying created grammar ===\n"); - auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root", - parser_data.grammar_lazy, nullptr, 0, nullptr, 0); - if (grammar != nullptr) { - LOG_ERR("\n=== Grammar successfully created ===\n"); + if (!parser_data.grammar.empty()) { + LOG_ERR("\n=== Verifying created grammar ===\n"); + auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root", + parser_data.grammar_lazy, nullptr, 0, nullptr, 0); + if (grammar != nullptr) { + LOG_ERR("\n=== Grammar successfully created ===\n"); + } } } } catch (const std::exception & e) { From e772822011973f117c99add11a42d2ac0dce8de5 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 13 Feb 2026 00:55:56 +0100 Subject: [PATCH 32/39] Whitespace --- common/chat-diff-analyzer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 03978f6e57..1587faaf9e 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -478,7 +478,7 @@ content_analysis differential_analyzer::analyze_content(const common_chat_templa LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET); content_analysis result; - + json assistant_content_only = json{ { "role", "assistant" }, { "content", "Response text" } @@ -560,7 +560,7 @@ content_analysis differential_analyzer::analyze_content(const common_chat_templa result.mode = content_mode::ALWAYS_WRAPPED; // TODO: END_DELIMITED content mode - delimited at end but not at start? } - + return result; } From 24cc1bcd6dee01d3a4957122796429a9c229ce8a Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 13 Feb 2026 03:17:20 +0100 Subject: [PATCH 33/39] Clean algorithm for calculate_diff_split; fix buggy expectations --- common/chat-auto-parser-helpers.cpp | 308 ++++++++++------------------ common/chat-diff-analyzer.cpp | 30 ++- common/chat-diff-analyzer.h | 8 + tests/test-chat-auto-parser.cpp | 10 +- 4 files changed, 133 insertions(+), 223 deletions(-) diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp index d2aec2d9bb..d03845d861 100644 --- a/common/chat-auto-parser-helpers.cpp +++ b/common/chat-auto-parser-helpers.cpp @@ -4,6 +4,7 @@ #include "nlohmann/json.hpp" #include +#include using json = nlohmann::ordered_json; @@ -61,227 +62,128 @@ std::string trim_trailing_newlines(const std::string & str) { return str.substr(0, end); } -// Helper to find unmatched bracket/tag in a string -// Finds an unmatched bracket in a string. -// search_backwards=true: finds unclosed opening bracket at end (returns bracket position) -// search_backwards=false: finds unopened closing bracket at start (returns position after bracket) -static size_t find_unmatched_bracket(const std::string & str, bool search_backwards) { - if (str.empty()) { - return std::string::npos; - } - - // Compute iteration bounds and bracket types based on direction - const char * primary_brackets = search_backwards ? "<[" : ">]"; - - for (size_t i = 0; i < str.length(); ++i) { - // Map iteration index to actual position based on direction - size_t pos = search_backwards ? (str.length() - 1 - i) : i; - char c = str[pos]; - - // Check if this is a primary bracket we're looking for - if (c == primary_brackets[0] || c == primary_brackets[1]) { - // Get the matching bracket: < matches >, [ matches ], and vice versa - char match_bracket = (c == '<' || c == '>') ? (c == '<' ? '>' : '<') : (c == '[' ? ']' : '['); - - // Search for matching bracket in the appropriate range - size_t inner_start = search_backwards ? (pos + 1) : 0; - size_t inner_end = search_backwards ? str.length() : pos; - bool found_match = false; - - for (size_t j = inner_start; j < inner_end; ++j) { - if (str[j] == match_bracket) { - found_match = true; - break; - } - } - - if (!found_match) { - return search_backwards ? pos : (pos + 1); - } - } - } - - return std::string::npos; -} - -static size_t find_unclosed_bracket_at_end(const std::string & str) { - return find_unmatched_bracket(str, true); -} - -static size_t find_unopened_bracket_at_start(const std::string & str) { - return find_unmatched_bracket(str, false); -} - -// Returns true if `s` contains an unmatched bracket. -// search_backwards=true: looks for opening bracket without matching closing after it -// search_backwards=false: looks for closing bracket without matching opening before it -static bool contains_unmatched_bracket(const std::string & s, char opening, char closing, bool search_backwards) { - if (s.empty()) { - return false; - } - - char primary = search_backwards ? opening : closing; - - for (size_t i = 0; i < s.length(); ++i) { - // Map iteration index to actual position based on direction - size_t pos = search_backwards ? (s.length() - 1 - i) : i; - - if (s[pos] == primary) { - // Search for matching bracket in the appropriate range - size_t inner_start = search_backwards ? (pos + 1) : 0; - size_t inner_end = search_backwards ? s.length() : pos; - char match_bracket = search_backwards ? closing : opening; - bool found_match = false; - - for (size_t j = inner_start; j < inner_end; ++j) { - if (s[j] == match_bracket) { - found_match = true; - break; - } - } - - if (!found_match) { - return true; - } - } - } - return false; -} - -static bool contains_unopened_closing(const std::string & s, char opening, char closing) { - return contains_unmatched_bracket(s, opening, closing, false); -} - -static bool contains_unclosed_opening(const std::string & s, char opening, char closing) { - return contains_unmatched_bracket(s, opening, closing, true); -} - -// Moves incomplete tags from prefix/suffix into left/right parts -// Only moves tags when we detect the split pattern in BOTH left and right -static diff_split fix_tag_boundaries(diff_split result) { - // Check if prefix ends with an unclosed bracket/tag - // No fixed window: search the entire neighboring strings for matching brackets - size_t unclosed_pos = find_unclosed_bracket_at_end(result.prefix); - if (unclosed_pos != std::string::npos) { - char opening_bracket = result.prefix[unclosed_pos]; - char closing_bracket = (opening_bracket == '<') ? '>' : ']'; - - // Look for the specific closing bracket that matches our opening bracket - bool left_has_pattern = contains_unopened_closing(result.left, opening_bracket, closing_bracket); - bool right_has_pattern = contains_unopened_closing(result.right, opening_bracket, closing_bracket); - bool suffix_has_pattern = contains_unopened_closing(result.suffix, opening_bracket, closing_bracket); - - // Move the tag if both sides satisfy: has pattern OR is empty (and other has pattern) - // This handles cases like: left="" right="_begin|>..." or left="stuff>" right="stuff>" - bool left_satisfies = left_has_pattern || (result.left.empty() && suffix_has_pattern); - bool right_satisfies = right_has_pattern || (result.right.empty() && suffix_has_pattern); - - if (left_satisfies && right_satisfies) { - // Move the unclosed tag from prefix to left/right - std::string tag_part = result.prefix.substr(unclosed_pos); - result.prefix = result.prefix.substr(0, unclosed_pos); - result.left = tag_part + result.left; - result.right = tag_part + result.right; - } - } - - // Check if suffix starts with an unopened bracket/tag - size_t unopened_end = find_unopened_bracket_at_start(result.suffix); - if (unopened_end != std::string::npos) { - char closing_bracket = - result.suffix[unopened_end - 1]; // -1 because unopened_end is position after the bracket - char opening_bracket = (closing_bracket == '>') ? '<' : '['; - - // Check if BOTH left and right have the pattern of unclosed opening bracket at the end - bool left_has_pattern = contains_unclosed_opening(result.left, opening_bracket, closing_bracket); - bool right_has_pattern = contains_unclosed_opening(result.right, opening_bracket, closing_bracket); - bool prefix_has_pattern = contains_unclosed_opening(result.prefix, opening_bracket, closing_bracket); - - // Move the tag if both sides satisfy: has pattern OR is empty (and other has pattern) - bool left_satisfies = left_has_pattern || (result.left.empty() && prefix_has_pattern); - bool right_satisfies = right_has_pattern || (result.right.empty() && prefix_has_pattern); - - if (left_satisfies && right_satisfies) { - // Move the unopened tag from suffix to left/right - std::string tag_part = result.suffix.substr(0, unopened_end); - result.suffix = result.suffix.substr(unopened_end); - result.left = result.left + tag_part; - result.right = result.right + tag_part; - } - } - - return result; -} - -diff_split calculate_diff_split(const std::string & left, const std::string & right) { - diff_split result; - - // Find longest common prefix +static size_t common_prefix_len(const std::string & left, const std::string & right) { size_t prefix_len = 0; size_t min_len = std::min(left.length(), right.length()); while (prefix_len < min_len && left[prefix_len] == right[prefix_len]) { prefix_len++; } - result.prefix = left.substr(0, prefix_len); + return prefix_len; +} - // Find longest common suffix, ending no later than the end of the longest common prefix +static size_t common_suffix_len(const std::string & left, const std::string & right) { size_t suffix_len = 0; - while (suffix_len < min_len - prefix_len) { - size_t left_pos = left.length() - 1 - suffix_len; - size_t right_pos = right.length() - 1 - suffix_len; + size_t min_len = std::min(left.length(), right.length()); + while (suffix_len < min_len && left[left.length() - 1 - suffix_len] == right[right.length() - 1 - suffix_len]) { + suffix_len++; + } + return suffix_len; +} - // Ensure we're not going into the prefix region - if (left_pos < prefix_len || right_pos < prefix_len) { - break; +diff_split calculate_diff_split(const std::string & left, const std::string & right) { + diff_split result; + + auto left_seg = segmentize_markers(left); + auto right_seg = segmentize_markers(right); + + if (left_seg.empty()) { + result.right = right; + return result; + } + if (right_seg.empty()) { + result.left = left; + return result; + } + + auto left_start = left_seg.begin(); + auto left_end = --left_seg.end(); + auto right_start = right_seg.begin(); + auto right_end = --right_seg.end(); + + auto test = [&] () { + return left_start != left_end && right_start != right_end; + }; + + bool left_fully_consumed = false; + bool right_fully_consumed = false; + + while (test()) { + bool advanced = false; + if (*left_start == *right_start) { + result.prefix.append(left_start->value); + left_start++; + right_start++; + advanced = true; } - - if (left[left_pos] == right[right_pos]) { - suffix_len++; - } else { + if (*left_end == *right_end) { + result.suffix = left_end->value + result.suffix; + if (left_start != left_end) { + left_end--; + } else { + left_fully_consumed = true; + } + if (right_start != right_end) { + right_end--; + } else { + right_fully_consumed = true; + } + advanced = true; + } + if (!advanced) { break; } } - result.suffix = left.substr(left.length() - suffix_len); - // Extract the remainders (the parts between prefix and suffix) - result.left = left.substr(prefix_len, left.length() - prefix_len - suffix_len); - result.right = right.substr(prefix_len, right.length() - prefix_len - suffix_len); - - // Fix tag boundaries by moving incomplete tags to left/right - // We iterate because: - // 1. fix_tag_boundaries may move content from prefix/suffix to left/right - // 2. After that, we find common suffix in left/right to extract - // 3. The extracted suffix might contain tag parts that need fixing - // We apply fix AFTER suffix extraction to ensure incomplete tags aren't left in suffix - diff_split prev_result; - do { - prev_result = result; - - // First, find and extract any common suffix from left/right - size_t suffix_len = 0; - size_t min_len = std::min(result.left.length(), result.right.length()); - while (suffix_len < min_len) { - size_t left_pos = result.left.length() - 1 - suffix_len; - size_t right_pos = result.right.length() - 1 - suffix_len; - if (result.left[left_pos] == result.right[right_pos]) { - suffix_len++; - } else { - break; - } + if (left_start == left_end && right_start != right_end) { + if (*left_start == *right_end) { + result.suffix = right_end->value + result.suffix; + right_end--; + left_fully_consumed = true; + } else if (*left_start == *right_start) { + result.prefix.append(right_start->value); + right_start++; + left_fully_consumed = true; } - - if (suffix_len > 0) { - std::string common_suffix = result.left.substr(result.left.length() - suffix_len); - result.suffix = common_suffix + result.suffix; - result.left = result.left.substr(0, result.left.length() - suffix_len); - result.right = result.right.substr(0, result.right.length() - suffix_len); + } else if (right_start == right_end && left_start != left_end) { + if (*left_end == *right_start) { + result.suffix = left_end->value + result.suffix; + left_end--; + right_fully_consumed = true; + } else if (*left_start == *right_start) { + result.prefix.append(left_start->value); + left_start++; + right_fully_consumed = true; } + } else if (left_start == left_end && right_start == right_end && *left_start == *right_start && left_start->type == segment_type::MARKER) { + result.prefix.append(right_start->value); + left_fully_consumed = true; + right_fully_consumed = true; + } - // Then apply fix_tag_boundaries to move incomplete tags from prefix/suffix to left/right - result = fix_tag_boundaries(result); + auto eat_segment = [](std::string & str, segment & seg) -> std::string { return str.append(seg.value); }; - } while (!(result == prev_result) && result.left != left && result.right != right); + bool can_have_text_suffix = left_end->type == segment_type::TEXT && right_end->type == segment_type::TEXT; + bool can_have_text_prefix = right_start->type == segment_type::TEXT && left_start->type == segment_type::TEXT; + std::string remainder_left = std::accumulate(left_start, left_fully_consumed ? left_end : ++left_end, std::string(), eat_segment); + std::string remainder_right = std::accumulate(right_start, right_fully_consumed ? right_end : ++right_end, std::string(), eat_segment); + + size_t suffix_len = can_have_text_suffix ? common_suffix_len(remainder_left, remainder_right) : 0; + // avoid overlaps between prefix and suffix + size_t prefix_len = can_have_text_prefix ? common_prefix_len(remainder_left.substr(0, remainder_left.size() - suffix_len), + remainder_right.substr(0, remainder_right.size() - suffix_len)) : 0; + + result.prefix.append(remainder_left.substr(0, prefix_len)); + result.suffix = remainder_left.substr(remainder_left.length() - suffix_len, suffix_len) + result.suffix; + result.left = remainder_left.substr(prefix_len, remainder_left.length() - prefix_len - suffix_len); + result.right = remainder_right.substr(prefix_len, remainder_right.length() - prefix_len - suffix_len); + + if (result.left == "" && result.right == "") { + // degenerate case, no diff + result.prefix = left; + result.suffix = ""; + // pick prefix = all as representation + } return result; } diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 1587faaf9e..a7550a3b6b 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -3,6 +3,7 @@ #include "chat-auto-parser-helpers.h" #include "chat-auto-parser.h" #include "chat.h" +#include "llama.h" #include "log.h" #include "nlohmann/json.hpp" @@ -381,17 +382,14 @@ void differential_analyzer::compare_thinking_enabled(const common_chat_template } } - // Check for slash-in-tag pattern: vs - // diff shows: suffix="think>", left="/", right="" (or vice versa) if (reasoning.start.empty() && reasoning.end.empty()) { - if (diff.right.empty() && trim_whitespace(diff.left) == "/") { - auto seg_A = segmentize_markers(trim_trailing_whitespace(comparison->output_A)); - auto seg_B = segmentize_markers(trim_trailing_whitespace(comparison->output_B)); - if (!seg_A.empty() && !seg_B.empty() && seg_A[seg_A.size() - 1].type == segment_type::MARKER && - seg_B[seg_B.size() - 1].type == segment_type::MARKER) { - reasoning.mode = reasoning_mode::FORCED_CLOSED; - reasoning.start = seg_B[seg_B.size() - 1].value; - reasoning.end = seg_A[seg_A.size() - 1].value; + if (!diff.left.empty() && !diff.right.empty()) { + auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left)); + auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right)); + if (seg_A.size() == 1 && seg_B.size() == 1) { + reasoning.mode = reasoning_mode::FORCED_CLOSED; + reasoning.start = seg_B[0].value; + reasoning.end = seg_A[0].value; } } } @@ -739,7 +737,7 @@ void differential_analyzer::analyze_tool_call_format_json_native(const std::stri }; // now let's check if we're in an array construction, mark it if so and get out of it if (json_start > 0 && space_or_bracket(true, clean_haystack[json_start - 1])) { - for (--json_start; space_or_bracket(true, clean_haystack[json_start]) && json_start >= 0; json_start--) { + for (--json_start; space_or_bracket(true, clean_haystack[json_start]) && json_start > 0; json_start--) { if (clean_haystack[json_start] == '[') { format.tools_array_wrapped = true; break; @@ -900,7 +898,9 @@ void differential_analyzer::check_per_call_markers(const common_chat_template & return; } - std::string second_tool_content = trim_leading_whitespace(one_vs_two->diff.right); + diff_split filter_common_call_part = calculate_diff_split(one_vs_two->diff.suffix, one_vs_two->diff.right); + + std::string second_tool_content = trim_leading_whitespace(filter_common_call_part.right); if (!result.section_start.empty() && second_tool_content.find(result.section_start) == 0) { result.per_call_start = result.section_start; @@ -945,8 +945,6 @@ tool_function_analysis differential_analyzer::extract_function_markers(const com } const auto & diff = comparison->diff; - LOG_DBG("T3 diff - suffix: '%s'\n", diff.suffix.c_str()); - LOG_DBG("T3 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); if (diff.left.find("foofoo") != std::string::npos && diff.right.find("barbar") != std::string::npos) { std::string prefix_marker; @@ -1371,8 +1369,6 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha } const auto & diff = comparison->diff; - LOG_DBG("T6 diff (call_id) - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str()); - LOG_DBG("T6 diff (call_id) - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str()); if (diff.left.empty() && diff.right.empty()) { return result; @@ -1447,7 +1443,6 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha for (size_t i = 0; i < suffix_segments.size(); i++) { if (suffix_segments[i].type == segment_type::MARKER) { result.suffix = suffix_segments[i].value; - LOG_DBG("T6: call_id_suffix='%s'\n", result.suffix.c_str()); break; } // Stop if we hit the args @@ -1468,7 +1463,6 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha for (int i = (int) segments.size() - 1; i >= 0; i--) { if (segments[i].type == segment_type::MARKER) { result.prefix = segments[i].value; - LOG_DBG("T6: call_id_prefix='%s'\n", result.prefix.c_str()); break; } } diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h index c035203923..b1bfc83283 100644 --- a/common/chat-diff-analyzer.h +++ b/common/chat-diff-analyzer.h @@ -379,4 +379,12 @@ struct segment { std::string value; segment(segment_type type, std::string value) : type(type), value(std::move(value)) {} + + bool operator==(const segment & other) const { + return type == other.type && value == other.value; + } + + bool operator!=(const segment & other) const { + return !(*this == other); + } }; diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index b2bdab15e9..4f3f7f5ec2 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -217,6 +217,12 @@ static void test_calculate_diff_split_identical(testing & t) { t.assert_equal("left should be empty", "", result.left); t.assert_equal("right should be empty", "", result.right); t.assert_equal("suffix should be empty", "", result.suffix); + + result = calculate_diff_split("", ""); + t.assert_equal("prefix should be ''", "", result.prefix); + t.assert_equal("left should be empty", "", result.left); + t.assert_equal("right should be empty", "", result.right); + t.assert_equal("suffix should be empty", "", result.suffix); } static void test_calculate_diff_split_common_prefix(testing & t) { @@ -894,8 +900,8 @@ static void test_seed_oss_call_count(testing & t) { t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos); t.assert_true("T2 right should contain second tool_call end", diff.right.find("") != std::string::npos); - // Suffix should be the eos token - t.assert_equal("T2 suffix should be ''", "", diff.suffix); + // Suffix should end with the eos token + t.assert_equal("T2 suffix should end with ''", "", diff.suffix.substr(diff.suffix.length() - 10, 10)); } // T3: Compare different function names From 6415d0f03f45f2d9438bdb6702668c89e9bbadd9 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Fri, 13 Feb 2026 14:42:26 +0100 Subject: [PATCH 34/39] Add TODO --- common/chat-auto-parser-helpers.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp index d03845d861..0f40d9e813 100644 --- a/common/chat-auto-parser-helpers.cpp +++ b/common/chat-auto-parser-helpers.cpp @@ -244,6 +244,9 @@ std::string after_common_suffix(const std::string & full, const std::string & le return full.substr(pos + common_suffix_len); } +// TODO: segmentize will treat a JSON array inside tags as a tag: [{ "fun": { ... } }] will be three markers +// not too worried about that because it hasn't turned out as a problem anywhere, but noting here in case it will +// Might have to put some restrictions on tag contents as well (like "no { }") std::vector segmentize_markers(const std::string & text) { std::vector retval; bool in_marker = false; From 3605e78569d1e101fda7538eaf1104debd26532e Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 14 Feb 2026 00:17:43 +0100 Subject: [PATCH 35/39] Refactor into class-based approach --- common/chat-auto-parser-generator.cpp | 285 +++++++------ common/chat-auto-parser-helpers.cpp | 57 +++ common/chat-auto-parser-helpers.h | 17 + common/chat-auto-parser.h | 35 +- common/chat-diff-analyzer.cpp | 552 +++++++++++-------------- common/chat-diff-analyzer.h | 282 +++++++------ common/chat.cpp | 30 +- common/chat.h | 6 +- tests/test-chat-auto-parser.cpp | 42 +- tools/parser/debug-template-parser.cpp | 8 +- tools/parser/template-analysis.cpp | 36 +- 11 files changed, 706 insertions(+), 644 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 13ec14fb64..bc2b1f7bbe 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -19,22 +19,23 @@ static void foreach_function(const json & tools, const std::functionis_always_wrapped()) { + auto wrapped_content = ctx.content->build_optional_wrapped(ctx); + return ctx.reasoning_parser + wrapped_content + tools_parser + p.end(); } - auto content_before_tools = analysis.tools.format.section_start.empty() ? p.eps() : p.until(analysis.tools.format.section_start); - return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); + auto content_before_tools = format.section_start.empty() ? p.eps() : p.until(format.section_start); + return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); } -common_peg_parser universal_peg_generator::build_tool_parser_tag_json( - common_chat_peg_unified_builder & p, - const diff_analysis_result & analysis, - const templates_params & inputs, - const common_peg_parser & reasoning) { +common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const { + auto & p = ctx.p; + const auto & inputs = ctx.inputs; common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + const auto & func = tool.at("function"); + std::string name = func.at("name"); + const auto & schema = func.at("parameters"); // Build call_id parser based on position (if supported) common_peg_parser call_id_section = p.eps(); - if (analysis.tools.call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && - !analysis.tools.call_id.prefix.empty() && !analysis.tools.call_id.suffix.empty()) { - call_id_section = p.optional(analysis.tools.call_id.prefix + p.tool_id(p.until(analysis.tools.call_id.suffix))) + analysis.tools.call_id.suffix; + if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !call_id.prefix.empty() && !call_id.suffix.empty()) { + call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix; } - auto func_parser = p.tool_open(analysis.tools.function.name_prefix + p.tool_name(p.literal(name)) + analysis.tools.function.name_suffix) + + auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + call_id_section + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)); - if (!analysis.tools.function.close.empty()) { - func_parser = func_parser + analysis.tools.function.close; + if (!function.close.empty()) { + func_parser = func_parser + function.close; } tool_choice |= p.rule("tool-" + name, func_parser); @@ -221,26 +254,26 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_json( common_peg_parser tool_calls = p.eps(); - if (!analysis.tools.format.per_call_start.empty()) { - auto wrapped_call = analysis.tools.format.per_call_start + tool_choice + analysis.tools.format.per_call_end; + if (!format.per_call_start.empty()) { + auto wrapped_call = format.per_call_start + tool_choice + format.per_call_end; if (inputs.parallel_tool_calls) { tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); } else { tool_calls = p.trigger_rule("tool-call", wrapped_call); } - if (!analysis.tools.format.section_start.empty()) { - tool_calls = p.trigger_rule("tool-calls", p.literal(analysis.tools.format.section_start) + p.space() + - tool_calls + p.space() + (analysis.tools.format.section_end.empty() ? p.end() : p.literal(analysis.tools.format.section_end))); + if (!format.section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() + + tool_calls + p.space() + (format.section_end.empty() ? p.end() : p.literal(format.section_end))); } } else { std::string separator = ", "; // Default if (inputs.parallel_tool_calls) { tool_calls = p.trigger_rule("tool-call", - analysis.tools.format.section_start + tool_choice + p.zero_or_more(separator + tool_choice) + analysis.tools.format.section_end); + format.section_start + tool_choice + p.zero_or_more(separator + tool_choice) + format.section_end); } else { tool_calls = p.trigger_rule("tool-call", - analysis.tools.format.section_start + tool_choice + analysis.tools.format.section_end); + format.section_start + tool_choice + format.section_end); } } @@ -248,23 +281,21 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_json( tool_calls = p.optional(tool_calls); } - std::string trigger_marker = !analysis.tools.format.section_start.empty() ? analysis.tools.format.section_start : analysis.tools.format.per_call_start; + std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); + return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); } -common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( - common_chat_peg_unified_builder & p, - const diff_analysis_result & analysis, - const templates_params & inputs, - const common_peg_parser & reasoning) { +common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const { + auto & p = ctx.p; + const auto & inputs = ctx.inputs; common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & params = function.at("parameters"); + const auto & func = tool.at("function"); + std::string name = func.at("name"); + const auto & params = func.at("parameters"); if (!params.contains("properties") || !params.at("properties").is_object()) { return; @@ -283,13 +314,13 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( auto type = param_schema.value("type", "object"); auto arg = p.tool_arg( - p.tool_arg_open(analysis.tools.arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + analysis.tools.arguments.name_suffix) + analysis.tools.arguments.value_prefix + + p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix + (type == "string" ? - p.tool_arg_string_value(p.schema(p.until(analysis.tools.arguments.value_suffix), + p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : p.tool_arg_json_value(p.schema(p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) + - p.tool_arg_close(p.literal(analysis.tools.arguments.value_suffix)) + p.tool_arg_close(p.literal(arguments.value_suffix)) ); if (is_required) { @@ -310,23 +341,23 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( // Build call_id parser based on position (if supported) common_peg_parser call_id_section = p.eps(); - if (analysis.tools.call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && - !analysis.tools.call_id.prefix.empty() && !analysis.tools.call_id.suffix.empty()) { - call_id_section = p.optional(analysis.tools.call_id.prefix + p.tool_id(p.until(analysis.tools.call_id.suffix))) + analysis.tools.call_id.suffix; + if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !call_id.prefix.empty() && !call_id.suffix.empty()) { + call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix; } - auto func_parser = p.tool_open(analysis.tools.function.name_prefix + p.tool_name(p.literal(name)) + analysis.tools.function.name_suffix) + + auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + call_id_section + p.space() + args_seq; - if (!analysis.tools.function.close.empty()) { - func_parser = func_parser + p.space() + p.tool_close(p.literal(analysis.tools.function.close)); - } else if (!analysis.tools.format.per_call_end.empty()) { + if (!function.close.empty()) { + func_parser = func_parser + p.space() + p.tool_close(p.literal(function.close)); + } else if (!format.per_call_end.empty()) { // When there's no func_close but there is a per_call_end marker, use peek() to ensure // we only emit tool_close when we can actually see the closing marker. This prevents // premature closing during partial parsing when we've seen e.g. "" (end) or "" prefix that failed to match. - func_parser = func_parser + p.tool_close(p.peek(p.literal(analysis.tools.format.per_call_end))); + func_parser = func_parser + p.tool_close(p.peek(p.literal(format.per_call_end))); } else { func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper } @@ -338,26 +369,26 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( common_peg_parser tool_calls = p.eps(); - if (!analysis.tools.format.per_call_start.empty()) { - auto wrapped_call = analysis.tools.format.per_call_start + p.space() + tool_choice + p.space() + analysis.tools.format.per_call_end; + if (!format.per_call_start.empty()) { + auto wrapped_call = format.per_call_start + p.space() + tool_choice + p.space() + format.per_call_end; if (inputs.parallel_tool_calls) { tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); } else { tool_calls = p.trigger_rule("tool-call", wrapped_call); } - if (!analysis.tools.format.section_start.empty()) { - tool_calls = p.trigger_rule("tool-calls", p.literal(analysis.tools.format.section_start) + p.space() + - tool_calls + p.space() + (analysis.tools.format.section_end.empty() ? p.end() : p.literal(analysis.tools.format.section_end))); + if (!format.section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() + + tool_calls + p.space() + (format.section_end.empty() ? p.end() : p.literal(format.section_end))); } } else { std::string separator = ", "; // Default if (inputs.parallel_tool_calls) { tool_calls = p.trigger_rule("tool-call", - analysis.tools.format.section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + analysis.tools.format.section_end); + format.section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + format.section_end); } else { tool_calls = p.trigger_rule("tool-call", - analysis.tools.format.section_start + p.space() + tool_choice + p.space() + analysis.tools.format.section_end); + format.section_start + p.space() + tool_choice + p.space() + format.section_end); } } @@ -365,7 +396,9 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( tool_calls = p.optional(tool_calls); } - std::string trigger_marker = !analysis.tools.format.section_start.empty() ? analysis.tools.format.section_start : analysis.tools.format.per_call_start; + std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); + return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); } + +} // namespace autoparser diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp index 0f40d9e813..3be1cbf1b2 100644 --- a/common/chat-auto-parser-helpers.cpp +++ b/common/chat-auto-parser-helpers.cpp @@ -1,6 +1,9 @@ #include "chat-auto-parser-helpers.h" +#include "chat-auto-parser.h" #include "chat-diff-analyzer.h" +#include "chat.h" +#include "log.h" #include "nlohmann/json.hpp" #include @@ -289,3 +292,57 @@ std::vector prune_whitespace_segments(const std::vector & segm return result; } +namespace autoparser { + +std::string apply_template(const common_chat_template & tmpl, const template_params & params) { + templates_params tmpl_params; + tmpl_params.messages = params.messages; + tmpl_params.tools = params.tools; + tmpl_params.add_generation_prompt = params.add_generation_prompt; + tmpl_params.enable_thinking = params.enable_thinking; + + if (params.extra_context) { + tmpl_params.extra_context = *params.extra_context; + } + tmpl_params.extra_context["enable_thinking"] = params.enable_thinking; + + try { + return common_chat_template_direct_apply(tmpl, tmpl_params); + } catch (const std::exception & e) { + LOG_DBG("Template application failed: %s\n", e.what()); + return ""; + } +} + +std::optional compare_variants( + const common_chat_template & tmpl, + const template_params & params_A, + const std::function & params_modifier) { + // Create variant B by copying A + template_params params_B = params_A; + + // Apply modifier to create variant B + if (params_modifier) { + params_modifier(params_B); + } + + // Apply template to both variants + std::string output_A = apply_template(tmpl, params_A); + std::string output_B = apply_template(tmpl, params_B); + + // Check for template application failures + if (output_A.empty() || output_B.empty()) { + return std::nullopt; + } + + // Calculate diff and return result with both outputs + compare_variants_result result; + result.diff = calculate_diff_split(output_A, output_B); + result.output_A = output_A; + result.output_B = output_B; + + return result; +} + +} // namespace autoparser + diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h index 47e7a2a3d8..c235d63850 100644 --- a/common/chat-auto-parser-helpers.h +++ b/common/chat-auto-parser-helpers.h @@ -1,6 +1,8 @@ #pragma once #include "chat-diff-analyzer.h" +#include +#include #include std::string trim_whitespace(const std::string & str); @@ -54,3 +56,18 @@ std::vector segmentize_markers(const std::string & text); // prune_whitespace_segments(X) -> [ (MARKER, ""), (MARKER, ""), (MARKER, ""), (MARKER, ""), // (MARKER, ""), (MARKER, "") ] std::vector prune_whitespace_segments(const std::vector & segments); + +namespace autoparser { + +// Apply a template with the given parameters, returning the rendered string (empty on failure) +std::string apply_template(const common_chat_template & tmpl, const template_params & params); + +// Factorized differential comparison function +// Takes base params and a single modifier lambda to create variant B +// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure +std::optional compare_variants( + const common_chat_template & tmpl, + const template_params & params_A, + const std::function & params_modifier); + +} // namespace autoparser diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h index 40f1fbe1bb..31ee56dd03 100644 --- a/common/chat-auto-parser.h +++ b/common/chat-auto-parser.h @@ -10,6 +10,8 @@ using json = nlohmann::ordered_json; +namespace autoparser { + struct templates_params { json messages; json tools; @@ -37,34 +39,7 @@ class universal_peg_generator { static common_chat_params generate_parser(const common_chat_template & tmpl, const struct templates_params & inputs, - const diff_analysis_result & analysis); - - private: - // Build unified parser (single code path for all formats) - static common_peg_arena build_parser(const diff_analysis_result & analysis, - const struct templates_params & inputs, - bool thinking_forced_open, - bool thinking_forced_closed = false); - - // Build tool calling parser based on detected format - static common_peg_parser build_tool_parser(common_chat_peg_unified_builder & p, - const diff_analysis_result & analysis, - const templates_params & inputs, - const common_peg_parser & reasoning); - - // Per-format tool parser builders - static common_peg_parser build_tool_parser_json_native(common_chat_peg_unified_builder & p, - const diff_analysis_result & analysis, - const templates_params & inputs, - const common_peg_parser & reasoning); - - static common_peg_parser build_tool_parser_tag_json(common_chat_peg_unified_builder & p, - const diff_analysis_result & analysis, - const templates_params & inputs, - const common_peg_parser & reasoning); - - static common_peg_parser build_tool_parser_tag_tagged(common_chat_peg_unified_builder & p, - const diff_analysis_result & analysis, - const templates_params & inputs, - const common_peg_parser & reasoning); + const analyze_template & analysis); }; + +} // namespace autoparser diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index a7550a3b6b..2256e48976 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -1,9 +1,7 @@ #include "chat-diff-analyzer.h" #include "chat-auto-parser-helpers.h" -#include "chat-auto-parser.h" #include "chat.h" -#include "llama.h" #include "log.h" #include "nlohmann/json.hpp" @@ -17,10 +15,12 @@ using json = nlohmann::ordered_json; -static std::vector> workarounds( +namespace autoparser { + +static std::vector> workarounds( { // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to // support reasoning on them - [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { if (tmpl.src.find("content.split('')") != std::string::npos && analysis.reasoning.mode == reasoning_mode::NONE) { analysis.reasoning.mode = reasoning_mode::FORCED_OPEN; @@ -32,7 +32,7 @@ static std::vector void { + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { if (tmpl.src.find("Write your thoughts between and write your response between " "") != std::string::npos) { analysis.reasoning.mode = reasoning_mode::TAG_BASED; @@ -49,7 +49,7 @@ static std::vector...<|END_OF_TURN_TOKEN|> - [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void { + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos && tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.content.start.empty()) { analysis.content.mode = content_mode::ALWAYS_WRAPPED; @@ -61,7 +61,7 @@ static std::vector void { + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", " "\"code_interpreter\") | list | length > 0") != std::string::npos) { analysis.content.mode = content_mode::PLAIN; @@ -82,7 +82,7 @@ static std::vector void { + [](const common_chat_template & tmpl, analyze_template & analysis) -> void { if (tmpl.src.find( "{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>'") != std::string::npos) { @@ -138,94 +138,76 @@ static json second_tool_call = static json first_tool_call_alt_id = build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call99999"); -std::string differential_analyzer::apply_template(const common_chat_template & tmpl, const template_params & params) { - templates_params tmpl_params; - tmpl_params.messages = params.messages; - tmpl_params.tools = params.tools; - tmpl_params.add_generation_prompt = params.add_generation_prompt; - tmpl_params.enable_thinking = params.enable_thinking; - - if (params.extra_context) { - tmpl_params.extra_context = *params.extra_context; - } - tmpl_params.extra_context["enable_thinking"] = params.enable_thinking; - - try { - return common_chat_template_direct_apply(tmpl, tmpl_params); - } catch (const std::exception & e) { - LOG_DBG("Template application failed: %s\n", e.what()); - return ""; - } -} - -std::optional differential_analyzer::compare_variants( - const common_chat_template & tmpl, - const template_params & params_A, - const std::function & params_modifier) { - // Create variant B by copying A - template_params params_B = params_A; - - // Apply modifier to create variant B - if (params_modifier) { - params_modifier(params_B); - } - - // Apply template to both variants - std::string output_A = apply_template(tmpl, params_A); - std::string output_B = apply_template(tmpl, params_B); - - // Check for template application failures - if (output_A.empty() || output_B.empty()) { - return std::nullopt; - } - - // Calculate diff and return result with both outputs - compare_variants_result result; - result.diff = calculate_diff_split(output_A, output_B); - result.output_A = output_A; - result.output_B = output_B; - - return result; -} - -diff_analysis_result differential_analyzer::analyze(const common_chat_template & tmpl) { - diff_analysis_result result; +// ============================================================================ +// analyze_template +// ============================================================================ +analyze_template::analyze_template(const common_chat_template & tmpl) + : jinja_caps(tmpl.original_caps()) + , reasoning(tmpl, jinja_caps.supports_tool_calls) + , content(tmpl, reasoning) + , tools(jinja_caps.supports_tool_calls ? analyze_tools(tmpl, jinja_caps, reasoning) : analyze_tools()) +{ LOG_DBG(ANSI_PURPLE "=== Starting differential analysis ===\n" ANSI_RESET); - result.jinja_caps = tmpl.original_caps(); - - result.reasoning = analyze_reasoning(tmpl, result.jinja_caps.supports_tool_calls); - result.content = analyze_content(tmpl, result.reasoning); - if (result.jinja_caps.supports_tool_calls) { - result.tools = analyze_tools(tmpl, result.jinja_caps, result.reasoning); - } - collect_preserved_tokens(result); + collect_preserved_tokens(); for (auto & workaround : workarounds) { - workaround(tmpl, result); + workaround(tmpl, *this); } LOG_DBG(ANSI_PURPLE "=== Differential analysis complete ===\n" ANSI_RESET); - - return result; } -reasoning_analysis differential_analyzer::analyze_reasoning(const common_chat_template & tmpl, bool supports_tools) { +void analyze_template::collect_preserved_tokens() { + auto add_token = [this](const std::string & org_token) { + std::string token = trim_whitespace(org_token); + if (!token.empty()) { + // Avoid duplicates + if (std::find(preserved_tokens.begin(), preserved_tokens.end(), token) == preserved_tokens.end()) { + preserved_tokens.push_back(token); + } + } + }; + + add_token(reasoning.start); + add_token(reasoning.end); + add_token(content.start); + add_token(content.end); + add_token(tools.format.section_start); + add_token(tools.format.section_end); + add_token(tools.format.per_call_start); + add_token(tools.format.per_call_end); + add_token(tools.function.name_prefix); + add_token(tools.function.name_suffix); + add_token(tools.function.close); + add_token(tools.arguments.start); + add_token(tools.arguments.end); + add_token(tools.arguments.name_prefix); + add_token(tools.arguments.name_suffix); + add_token(tools.arguments.separator); + add_token(tools.arguments.value_prefix); + add_token(tools.arguments.value_suffix); + add_token(tools.call_id.prefix); + add_token(tools.call_id.suffix); +} + +// ============================================================================ +// analyze_reasoning +// ============================================================================ + +analyze_reasoning::analyze_reasoning(const common_chat_template & tmpl, bool supports_tools) + : analyze_base(tmpl) { LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET); - reasoning_analysis result; - - compare_reasoning_presence(tmpl, result); - compare_thinking_enabled(tmpl, result); + compare_reasoning_presence(); + compare_thinking_enabled(); if (supports_tools) { - compare_reasoning_scope(tmpl, result); + compare_reasoning_scope(); } - - return result; } -void differential_analyzer::compare_reasoning_presence(const common_chat_template & tmpl, reasoning_analysis & reasoning) { +void analyze_reasoning::compare_reasoning_presence() { json user_msg = json{ { "role", "user" }, { "content", "Hello" } @@ -248,7 +230,7 @@ void differential_analyzer::compare_reasoning_presence(const common_chat_templat params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed, skipping reasoning detection\n" ANSI_RESET, __func__); @@ -263,22 +245,22 @@ void differential_analyzer::compare_reasoning_presence(const common_chat_templat auto seg = prune_whitespace_segments(segmentize_markers(diff.right)); if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) { // easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace) - reasoning.mode = reasoning_mode::TAG_BASED; - reasoning.start = trim_whitespace(seg[0].value); - reasoning.end = trim_leading_whitespace(seg[2].value); + mode = reasoning_mode::TAG_BASED; + start = trim_whitespace(seg[0].value); + end = trim_leading_whitespace(seg[2].value); for (size_t i = 3; i < seg.size(); i++) { - reasoning.end += seg[i].value; + end += seg[i].value; } // we always truncate because this doesn't really influence correctness but model might not always generate newline - reasoning.end = trim_whitespace(reasoning.end); + end = trim_whitespace(end); } else if (seg.size() >= 2 && trim_whitespace(seg[0].value) == reasoning_content) { // delimited - reasoning.mode = reasoning_mode::DELIMITER; - reasoning.end = trim_leading_whitespace(seg[1].value); + mode = reasoning_mode::DELIMITER; + end = trim_leading_whitespace(seg[1].value); for (size_t i = 2; i < seg.size(); i++) { - reasoning.end += seg[i].value; + end += seg[i].value; } - reasoning.end = trim_whitespace(reasoning.end); + end = trim_whitespace(end); } else if (seg.size() == 1 && trim_whitespace(seg[0].value) == reasoning_content) { // the marker might be in the prefix actually, let's check for case of // left: empty @@ -296,16 +278,16 @@ void differential_analyzer::compare_reasoning_presence(const common_chat_templat if (marker_seg.type == segment_type::TEXT) { marker_seg = pre_seg[pre_seg.size() - 2]; } - reasoning.mode = reasoning_mode::FORCED_CLOSED; - reasoning.start = trim_whitespace(marker_seg.value); - reasoning.end = trim_whitespace(suf_seg[0].value); + mode = reasoning_mode::FORCED_CLOSED; + start = trim_whitespace(marker_seg.value); + end = trim_whitespace(suf_seg[0].value); } } } } } -void differential_analyzer::compare_thinking_enabled(const common_chat_template & tmpl, reasoning_analysis & reasoning) { +void analyze_reasoning::compare_thinking_enabled() { json user_msg = json{ { "role", "user" }, { "content", "Hello" } @@ -316,7 +298,7 @@ void differential_analyzer::compare_thinking_enabled(const common_chat_template params.add_generation_prompt = true; params.enable_thinking = false; - auto comparison = compare_variants(tmpl, params, [&](template_params & p) { p.enable_thinking = true; }); + auto comparison = compare_variants(*tmpl, params, [&](template_params & p) { p.enable_thinking = true; }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET , __func__); @@ -333,15 +315,15 @@ void differential_analyzer::compare_thinking_enabled(const common_chat_template trim_whitespace(right_trimmed); if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) { - if (reasoning.start.empty()) { - reasoning.start = right_trimmed; - reasoning.mode = reasoning_mode::FORCED_OPEN; + if (start.empty()) { + start = right_trimmed; + mode = reasoning_mode::FORCED_OPEN; } } } - if (reasoning.start.empty() && !reasoning.end.empty()) { - reasoning.mode = reasoning_mode::DELIMITER; + if (start.empty() && !end.empty()) { + mode = reasoning_mode::DELIMITER; } // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers, @@ -353,49 +335,49 @@ void differential_analyzer::compare_thinking_enabled(const common_chat_template // Both should end with the assistant role marker // Check if output_A has both reasoning_start and reasoning_end markers // while output_B has only reasoning_start - if (!reasoning.start.empty()) { + if (!start.empty()) { // Check if output_A contains both start and end markers - bool A_has_start = output_A.find(reasoning.start) != std::string::npos; - bool A_has_end = !reasoning.end.empty() && output_A.find(reasoning.end) != std::string::npos; + bool A_has_start = output_A.find(start) != std::string::npos; + bool A_has_end = !end.empty() && output_A.find(end) != std::string::npos; // Check if output_B contains only the start marker (and not the end marker) - bool B_has_start = output_B.find(reasoning.start) != std::string::npos; - bool B_has_end = !reasoning.end.empty() && output_B.find(reasoning.end) != std::string::npos; + bool B_has_start = output_B.find(start) != std::string::npos; + bool B_has_end = !end.empty() && output_B.find(end) != std::string::npos; // For FORCED_CLOSED: A should have both, B should have only start if (A_has_start && A_has_end && B_has_start && !B_has_end) { - reasoning.mode = reasoning_mode::FORCED_CLOSED; + mode = reasoning_mode::FORCED_CLOSED; } - } else if (!reasoning.end.empty()) { + } else if (!end.empty()) { // We might not have detected the reasoning open marker until now, // but this is another chance to do so auto diff = comparison->diff; auto diff_rt = trim_whitespace(diff.right); auto diff_lt = trim_whitespace(diff.left); - if (diff_rt.empty() && diff_lt == reasoning.end) { + if (diff_rt.empty() && diff_lt == end) { auto seg = segmentize_markers(trim_whitespace(diff.prefix)); if (!seg.empty() && seg[seg.size() - 1].type == MARKER) { // this is FORCED_CLOSED - reasoning.start = seg[seg.size() - 1].value; - reasoning.mode = reasoning_mode::FORCED_CLOSED; + start = seg[seg.size() - 1].value; + mode = reasoning_mode::FORCED_CLOSED; } } } } - if (reasoning.start.empty() && reasoning.end.empty()) { + if (start.empty() && end.empty()) { if (!diff.left.empty() && !diff.right.empty()) { auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left)); auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right)); if (seg_A.size() == 1 && seg_B.size() == 1) { - reasoning.mode = reasoning_mode::FORCED_CLOSED; - reasoning.start = seg_B[0].value; - reasoning.end = seg_A[0].value; + mode = reasoning_mode::FORCED_CLOSED; + start = seg_B[0].value; + end = seg_A[0].value; } } } } -void differential_analyzer::compare_reasoning_scope(const common_chat_template & tmpl, reasoning_analysis & reasoning) { +void analyze_reasoning::compare_reasoning_scope() { json assistant_reasoning_content = json{ { "role", "assistant" }, { "content", "Here is my response." }, @@ -417,7 +399,7 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); @@ -431,7 +413,7 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & bool reasoning_in_B = comparison->output_B.find(reasoning_content) != std::string::npos; if (!reasoning_in_A && reasoning_in_B) { - reasoning.mode = reasoning_mode::TOOLS_ONLY; + mode = reasoning_mode::TOOLS_ONLY; LOG_DBG("R3: Detected TOOLS_ONLY reasoning mode\n"); // Extract reasoning markers from output_B @@ -446,7 +428,7 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & for (auto & segment : segments_before) { if (segment.type == segment_type::MARKER) { - reasoning.start = segment.value; + start = segment.value; break; } } @@ -458,11 +440,11 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & if (!after_reasoning.empty()) { // Try to find matching end marker - if (!reasoning.start.empty()) { + if (!start.empty()) { auto segments = segmentize_markers(after_reasoning); for (auto & segment : segments) { if (segment.type == segment_type::MARKER) { - reasoning.end = segment.value; + end = segment.value; break; } } @@ -472,10 +454,13 @@ void differential_analyzer::compare_reasoning_scope(const common_chat_template & } } -content_analysis differential_analyzer::analyze_content(const common_chat_template & tmpl, const reasoning_analysis & reasoning) { - LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET); +// ============================================================================ +// analyze_content +// ============================================================================ - content_analysis result; +analyze_content::analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning) + : analyze_base(tmpl) { + LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET); json assistant_content_only = json{ { "role", "assistant" }, @@ -523,7 +508,7 @@ content_analysis differential_analyzer::analyze_content(const common_chat_templa if (trim_whitespace(diff_reasoning.left) == response || (segments.size() == 2 && trim_whitespace(segments[0].value) == response)) { // We only have the content text in the diff (possibly with a stray EOG marker), so no markers - result.mode = content_mode::PLAIN; + mode = content_mode::PLAIN; found_plain_content = true; } else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty() && diff_reasoning.left.find(reasoning.end) != std::string::npos) { @@ -531,7 +516,7 @@ content_analysis differential_analyzer::analyze_content(const common_chat_templa diff_reasoning.left.find(reasoning.end) + reasoning.end.length()); if (trim_whitespace(post_closed_reasoning) == "Response text") { LOG_DBG("C1: No content markers after stripping reasoning close marker\n"); - result.mode = content_mode::PLAIN; + mode = content_mode::PLAIN; found_plain_content = true; } } @@ -546,48 +531,51 @@ content_analysis differential_analyzer::analyze_content(const common_chat_templa size_t pos = pure_content.find("Response text"); if (pos == std::string::npos) { LOG_DBG(ANSI_ORANGE "%s: Error: response text not found - improper template application?\n" ANSI_RESET, __func__); - return result; + return; } - result.start = trim_leading_whitespace(pure_content.substr(0, pos)); - result.end = trim_leading_whitespace(pure_content.substr(pos + 13)); // 13 - len of "Response text" + start = trim_leading_whitespace(pure_content.substr(0, pos)); + end = trim_leading_whitespace(pure_content.substr(pos + 13)); // 13 - len of "Response text" // TODO: WRAPPED_WITH_REASONING } // Determine content mode - if (!result.start.empty() || !result.end.empty()) { - result.mode = content_mode::ALWAYS_WRAPPED; + if (!start.empty() || !end.empty()) { + mode = content_mode::ALWAYS_WRAPPED; // TODO: END_DELIMITED content mode - delimited at end but not at start? } - - return result; } -tool_analysis differential_analyzer::analyze_tools(const common_chat_template & tmpl, - const jinja::caps & caps, - const reasoning_analysis & reasoning) { - tool_analysis result; +bool analyze_content::is_always_wrapped() const { + return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty(); +} + +// ============================================================================ +// analyze_tools +// ============================================================================ + +analyze_tools::analyze_tools(const common_chat_template & tmpl, + const jinja::caps & caps, + const analyze_reasoning & reasoning) + : analyze_base(tmpl) { LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET); - result.format = analyze_tool_calls(tmpl, reasoning); + analyze_tool_calls(reasoning); - if (result.format.mode != tool_format::NONE && result.format.mode != tool_format::JSON_NATIVE) { + if (format.mode != tool_format::NONE && format.mode != tool_format::JSON_NATIVE) { if (caps.supports_parallel_tool_calls) { - check_per_call_markers(tmpl, result.format); + check_per_call_markers(); } - result.function = extract_function_markers(tmpl, result.format); - if (result.format.mode == tool_format::TAG_WITH_TAGGED) { - result.arguments = analyze_arguments(tmpl, result); + extract_function_markers(); + if (format.mode == tool_format::TAG_WITH_TAGGED) { + analyze_arguments(); } - extract_argument_separator(tmpl, result.arguments); - extract_args_markers(tmpl, result, result.arguments); - result.call_id = extract_call_id_markers(tmpl, result.format); + extract_argument_separator(); + extract_args_markers(); + extract_call_id_markers(); } - - return result; } -tool_format_analysis differential_analyzer::analyze_tool_calls(const common_chat_template & tmpl, - const reasoning_analysis & reasoning) { +void analyze_tools::analyze_tool_calls(const analyze_reasoning & reasoning) { json assistant_no_tools = json{ { "role", "assistant" }, { "content", "Response." } @@ -606,11 +594,11 @@ tool_format_analysis differential_analyzer::analyze_tool_calls(const common_chat params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); - return tool_format_analysis(); + return; } const auto & diff = comparison->diff; @@ -618,20 +606,18 @@ tool_format_analysis differential_analyzer::analyze_tool_calls(const common_chat std::string tool_section = diff.right; if (tool_section.empty()) { - return tool_format_analysis(); + return; } - return analyze_tool_call_format(tool_section, "foofoo", "first", reasoning); + analyze_tool_call_format(tool_section, "foofoo", "first", reasoning); } -tool_format_analysis differential_analyzer::analyze_tool_call_format(const std::string & haystack, - const std::string & fun_name_needle, - const std::string & arg_name_needle, - const reasoning_analysis & reasoning) { - tool_format_analysis result; - +void analyze_tools::analyze_tool_call_format(const std::string & haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + const analyze_reasoning & reasoning) { if (fun_name_needle.empty() || arg_name_needle.empty() || haystack.empty()) { - return result; + return; } auto in_json_haystack = [&haystack](const std::string & needle) -> bool { @@ -656,11 +642,11 @@ tool_format_analysis differential_analyzer::analyze_tool_call_format(const std:: if (in_json_haystack(fun_name_needle)) { // no need to check further, we're in JSON land - result.mode = tool_format::JSON_NATIVE; + format.mode = tool_format::JSON_NATIVE; } else if (in_json_haystack(arg_name_needle)) { - result.mode = tool_format::TAG_WITH_JSON; + format.mode = tool_format::TAG_WITH_JSON; } else { - result.mode = tool_format::TAG_WITH_TAGGED; + format.mode = tool_format::TAG_WITH_TAGGED; } // first, remove any reasoning markers @@ -678,22 +664,19 @@ tool_format_analysis differential_analyzer::analyze_tool_call_format(const std:: } } - if (result.mode == tool_format::JSON_NATIVE) { - analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle, result); + if (format.mode == tool_format::JSON_NATIVE) { + analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle); } else { - analyze_tool_call_format_non_json(clean_haystack, fun_name_needle, result); + analyze_tool_call_format_non_json(clean_haystack, fun_name_needle); } // always relax whitespace requirements on ending markers since they don't influence content - result.section_end = trim_whitespace(result.section_end); - result.per_call_end = trim_whitespace(result.per_call_end); - - return result; + format.section_end = trim_whitespace(format.section_end); + format.per_call_end = trim_whitespace(format.per_call_end); } -void differential_analyzer::analyze_tool_call_format_json_native(const std::string & clean_haystack, - const std::string & fun_name_needle, - const std::string & arg_name_needle, - tool_format_analysis & format) { +void analyze_tools::analyze_tool_call_format_json_native(const std::string & clean_haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle) { // we might not have the typical OpenAI tool calling structure int json_start = clean_haystack.find_first_of('{'); int json_end = clean_haystack.find_last_of('}'); @@ -781,9 +764,8 @@ void differential_analyzer::analyze_tool_call_format_json_native(const std::stri } } -void differential_analyzer::analyze_tool_call_format_non_json(const std::string & clean_haystack, - const std::string & fun_name_needle, - tool_format_analysis & format) { +void analyze_tools::analyze_tool_call_format_non_json(const std::string & clean_haystack, + const std::string & fun_name_needle) { // we need to split by markers... auto haystack_split = segmentize_markers(trim_leading_whitespace(clean_haystack)); int where_is_nemo = 0; @@ -871,7 +853,7 @@ void differential_analyzer::analyze_tool_call_format_non_json(const std::string } } -void differential_analyzer::check_per_call_markers(const common_chat_template & tmpl, tool_format_analysis & result) { +void analyze_tools::check_per_call_markers() { json assistant_one_tool = json{ { "role", "assistant" }, { "content", "" }, @@ -891,7 +873,7 @@ void differential_analyzer::check_per_call_markers(const common_chat_template & params.enable_thinking = true; auto one_vs_two = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); }); if (!one_vs_two) { LOG_DBG(ANSI_ORANGE "%s: Generating double tool call comparison failed\n" ANSI_RESET, __func__); @@ -901,18 +883,16 @@ void differential_analyzer::check_per_call_markers(const common_chat_template & diff_split filter_common_call_part = calculate_diff_split(one_vs_two->diff.suffix, one_vs_two->diff.right); std::string second_tool_content = trim_leading_whitespace(filter_common_call_part.right); - if (!result.section_start.empty() && - second_tool_content.find(result.section_start) == 0) { - result.per_call_start = result.section_start; - result.per_call_end = result.section_end; - result.section_start.clear(); - result.section_end.clear(); + if (!format.section_start.empty() && + second_tool_content.find(format.section_start) == 0) { + format.per_call_start = format.section_start; + format.per_call_end = format.section_end; + format.section_start.clear(); + format.section_end.clear(); } } -tool_function_analysis differential_analyzer::extract_function_markers(const common_chat_template & tmpl, const tool_format_analysis & analysis) { - tool_function_analysis result; - +void analyze_tools::extract_function_markers() { json assistant_nocall = json{ { "role", "assistant" }, { "content", "BBBB" }, @@ -937,37 +917,37 @@ tool_function_analysis differential_analyzer::extract_function_markers(const com params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); - return result; + return; } const auto & diff = comparison->diff; if (diff.left.find("foofoo") != std::string::npos && diff.right.find("barbar") != std::string::npos) { std::string prefix_marker; - if (!analysis.per_call_start.empty()) { - prefix_marker = analysis.per_call_start; + if (!format.per_call_start.empty()) { + prefix_marker = format.per_call_start; } else { - prefix_marker = analysis.section_start; + prefix_marker = format.section_start; } if (!prefix_marker.empty() && diff.prefix.rfind(prefix_marker) != std::string::npos) { - result.name_prefix = + function.name_prefix = diff.prefix.substr(diff.prefix.rfind(prefix_marker) + prefix_marker.size()); } auto seg = segmentize_markers(diff.left); for (const auto & s : seg) { if (s.value.find("foofoo") == std::string::npos) { - result.name_prefix += s.value; + function.name_prefix += s.value; } else { size_t pos = s.value.find("foofoo"); std::string pre = s.value.substr(0, pos); std::string post = s.value.substr(pos + 6); // 6 = len("foofoo") - result.name_prefix += pre; - result.name_suffix += post; + function.name_prefix += pre; + function.name_suffix += post; break; } } @@ -977,7 +957,7 @@ tool_function_analysis differential_analyzer::extract_function_markers(const com size_t stop_internal_pos = 0; for (const auto & ss : seg_suf) { bool has_needle = false; - if (analysis.mode == tool_format::TAG_WITH_JSON) { + if (format.mode == tool_format::TAG_WITH_JSON) { has_needle = (ss.type == segment_type::TEXT && ss.value.find_first_of("{[") != std::string::npos); if (has_needle) { stop_internal_pos = ss.value.find_first_of("{["); @@ -993,7 +973,7 @@ tool_function_analysis differential_analyzer::extract_function_markers(const com stop++; } if (stop < seg_suf.size() - 1) { - if (analysis.mode == tool_format::TAG_WITH_TAGGED) { + if (format.mode == tool_format::TAG_WITH_TAGGED) { size_t how_far = 0; if (stop > 0) { if (seg_suf[stop].type == segment_type::MARKER) { @@ -1002,30 +982,30 @@ tool_function_analysis differential_analyzer::extract_function_markers(const com how_far = stop - 1; } for (size_t i = 0; i < how_far; i++) { - result.name_suffix += seg_suf[i].value; + function.name_suffix += seg_suf[i].value; } } } else { for (size_t i = 0; i < stop; i++) { - result.name_suffix += seg_suf[i].value; + function.name_suffix += seg_suf[i].value; } const std::string & stopper = seg_suf[stop].value; - result.name_suffix += stopper.substr(0, stop_internal_pos); + function.name_suffix += stopper.substr(0, stop_internal_pos); } } // now just to find the closer std::string suffix_marker; - if (!analysis.per_call_end.empty()) { - suffix_marker = analysis.per_call_end; + if (!format.per_call_end.empty()) { + suffix_marker = format.per_call_end; } else { - suffix_marker = analysis.section_end; + suffix_marker = format.section_end; } std::string closer_suffix; if (suffix_marker.empty()) { // we'll have to rely on an extra diff with no-calls version auto notool_comp = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); }); auto nt_diff = notool_comp->diff; closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4); } else { @@ -1033,18 +1013,18 @@ tool_function_analysis differential_analyzer::extract_function_markers(const com } if (!closer_suffix.empty()) { auto closer_seg = segmentize_markers(closer_suffix); - bool need_to_eat_arg_marker = (analysis.mode == tool_format::TAG_WITH_TAGGED); + bool need_to_eat_arg_marker = (format.mode == tool_format::TAG_WITH_TAGGED); size_t last_arg_seg = closer_seg.size() - 1; for (int i = (int) closer_seg.size() - 1; i >= 0; i--) { if (closer_seg[i].value.find("YYYY") != std::string::npos) { last_arg_seg = i; } } - if (analysis.mode == tool_format::TAG_WITH_JSON) { + if (format.mode == tool_format::TAG_WITH_JSON) { const auto & entire_seg = closer_seg[last_arg_seg].value; size_t pos = entire_seg.find_last_of("}]"); if (pos != std::string::npos && pos < entire_seg.size() - 1) { - result.close = trim_leading_whitespace(entire_seg.substr(pos + 1)); + function.close = trim_leading_whitespace(entire_seg.substr(pos + 1)); } } for (size_t i = last_arg_seg + 1; i < closer_seg.size(); i++) { @@ -1052,31 +1032,25 @@ tool_function_analysis differential_analyzer::extract_function_markers(const com if (need_to_eat_arg_marker) { need_to_eat_arg_marker = false; } else { - result.close += closer_seg[i].value; + function.close += closer_seg[i].value; } } else if (!need_to_eat_arg_marker) { - result.close += closer_seg[i].value; + function.close += closer_seg[i].value; } } } - result.close = trim_leading_whitespace(result.close); + function.close = trim_leading_whitespace(function.close); } - return result; } -tool_arguments_analysis differential_analyzer::analyze_arguments(const common_chat_template & tmpl, const tool_analysis & tool_analysis) { +void analyze_tools::analyze_arguments() { LOG_DBG(ANSI_ORANGE "Phase 4: Argument analysis\n" ANSI_RESET); - tool_arguments_analysis result; - - extract_argument_name_markers(tmpl, result); - extract_argument_value_markers(tmpl, tool_analysis, result); - - return result; + extract_argument_name_markers(); + extract_argument_value_markers(); } -void differential_analyzer::extract_argument_name_markers(const common_chat_template & tmpl, - tool_arguments_analysis & args_analysis) { +void analyze_tools::extract_argument_name_markers() { json assistant_first_arg = json{ { "role", "assistant" }, { "content", "" }, @@ -1096,7 +1070,7 @@ void differential_analyzer::extract_argument_name_markers(const common_chat_temp params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); @@ -1125,11 +1099,11 @@ void differential_analyzer::extract_argument_name_markers(const common_chat_temp std::string right_name = right_remainder.substr(0, 6); // 6 = len("second") if (left_name == "first" && right_name == "second") { - args_analysis.name_prefix = trim_whitespace(common_prefix); + arguments.name_prefix = trim_whitespace(common_prefix); std::string suffix_left = left_remainder.substr(5, left_close - 5); std::string suffix_right = right_remainder.substr(6, right_close - 6); if (suffix_left == suffix_right) { - args_analysis.name_suffix = trim_leading_whitespace(suffix_left); + arguments.name_suffix = trim_leading_whitespace(suffix_left); } } } @@ -1137,22 +1111,22 @@ void differential_analyzer::extract_argument_name_markers(const common_chat_temp // we most likely have actual markers for argument names auto pre_seg = segmentize_markers(diff.prefix); for (int i = pre_seg.size() - 1; i >= 0; i--) { - args_analysis.name_prefix = args_analysis.name_prefix + pre_seg[i].value; + arguments.name_prefix = arguments.name_prefix + pre_seg[i].value; if (pre_seg[i].type == segment_type::MARKER) { break; } } auto left_seg = segmentize_markers(diff.left); if (left_seg.size() == 1) { // only the name + maybe extra whitespace / normal chars in differing part - args_analysis.name_suffix = diff.left.substr(5); + arguments.name_suffix = diff.left.substr(5); auto suf_seg= segmentize_markers(diff.suffix); for (size_t i = 0; i < suf_seg.size(); i++) { - args_analysis.name_suffix += suf_seg[i].value; + arguments.name_suffix += suf_seg[i].value; if (suf_seg[i].type == segment_type::MARKER) { if (i < suf_seg.size() - 2 && suf_seg[i + 1].type == segment_type::TEXT && trim_whitespace(suf_seg[i + 1].value).empty()) { // we need to include post-marker whitespace/newlines as well - args_analysis.name_suffix += suf_seg[i + 1].value; + arguments.name_suffix += suf_seg[i + 1].value; } break; } @@ -1165,12 +1139,12 @@ void differential_analyzer::extract_argument_name_markers(const common_chat_temp } else { to_add = left_seg[i].value; } - args_analysis.name_suffix += to_add; + arguments.name_suffix += to_add; if (left_seg[i].type == segment_type::MARKER) { if (i < left_seg.size() - 2 && left_seg[i + 1].type == segment_type::TEXT && trim_whitespace(left_seg[i + 1].value).empty()) { // we need to include post-marker whitespace/newlines as well - args_analysis.name_suffix += left_seg[i + 1].value; + arguments.name_suffix += left_seg[i + 1].value; } break; } @@ -1180,9 +1154,7 @@ void differential_analyzer::extract_argument_name_markers(const common_chat_temp } } -void differential_analyzer::extract_argument_value_markers(const common_chat_template & tmpl, - const tool_analysis & analysis, - tool_arguments_analysis & args_analysis) { +void analyze_tools::extract_argument_value_markers() { json assistant_val_X = json{ { "role", "assistant" }, { "content", "" }, @@ -1202,7 +1174,7 @@ void differential_analyzer::extract_argument_value_markers(const common_chat_tem params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); @@ -1212,7 +1184,7 @@ void differential_analyzer::extract_argument_value_markers(const common_chat_tem const auto & diff = comparison->diff; if (diff.left == "XXXX" && diff.right == "YYYY") { - std::string arg_name_ending = "first" + args_analysis.name_suffix; + std::string arg_name_ending = "first" + arguments.name_suffix; std::string prefix = diff.prefix; if (prefix.rfind(arg_name_ending) != std::string::npos) { prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size()); @@ -1220,7 +1192,7 @@ void differential_analyzer::extract_argument_value_markers(const common_chat_tem if (!prefix.empty()) { auto seg_pre = segmentize_markers(prefix); for (int i = seg_pre.size() - 1; i >= 0; i--) { - args_analysis.value_prefix = seg_pre[i].value + args_analysis.value_prefix; + arguments.value_prefix = seg_pre[i].value + arguments.value_prefix; if (seg_pre[i].type == segment_type::MARKER) { break; } @@ -1228,14 +1200,14 @@ void differential_analyzer::extract_argument_value_markers(const common_chat_tem } std::string value_suffix = diff.suffix; - if (!analysis.function.close.empty()) { - size_t func_close_pos = value_suffix.find(analysis.function.close); + if (!function.close.empty()) { + size_t func_close_pos = value_suffix.find(function.close); if (func_close_pos != std::string::npos) { value_suffix = value_suffix.substr(0, func_close_pos); } - } else if (!analysis.format.per_call_end.empty() || !analysis.format.section_end.empty()) { + } else if (!format.per_call_end.empty() || !format.section_end.empty()) { std::string end_marker = - !analysis.format.per_call_end.empty() ? analysis.format.per_call_end : analysis.format.section_end; + !format.per_call_end.empty() ? format.per_call_end : format.section_end; size_t end_marker_pos = value_suffix.find(end_marker); if (end_marker_pos != std::string::npos) { value_suffix = value_suffix.substr(0, end_marker_pos); @@ -1243,13 +1215,12 @@ void differential_analyzer::extract_argument_value_markers(const common_chat_tem } value_suffix = trim_leading_whitespace(value_suffix); if (!value_suffix.empty()) { - args_analysis.value_suffix = value_suffix; + arguments.value_suffix = value_suffix; } } } -void differential_analyzer::extract_argument_separator(const common_chat_template & tmpl, - tool_arguments_analysis & args_analysis) { +void analyze_tools::extract_argument_separator() { json assistant_one_arg = json{ { "role", "assistant" }, { "content", "" }, @@ -1269,7 +1240,7 @@ void differential_analyzer::extract_argument_separator(const common_chat_templat params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); @@ -1280,13 +1251,11 @@ void differential_analyzer::extract_argument_separator(const common_chat_templat if (!diff.right.empty()) { std::string separator = until_common_prefix(diff.right, "first", "second"); - args_analysis.separator = separator; + arguments.separator = separator; } } -void differential_analyzer::extract_args_markers(const common_chat_template & tmpl, - const tool_analysis & analysis, - tool_arguments_analysis & args_analysis) { +void analyze_tools::extract_args_markers() { json assistant_no_args = json{ { "role", "assistant"}, { "content", "" }, @@ -1306,7 +1275,7 @@ void differential_analyzer::extract_args_markers(const common_chat_template & tm params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); @@ -1315,9 +1284,9 @@ void differential_analyzer::extract_args_markers(const common_chat_template & tm const auto & diff = comparison->diff; - if (analysis.format.mode != tool_format::JSON_NATIVE) { - std::string prefix_marker = !analysis.format.section_start.empty() ? analysis.format.section_start : analysis.format.per_call_start; - std::string suffix_marker = !analysis.format.section_end.empty() ? analysis.format.section_end : analysis.format.per_call_end; + if (format.mode != tool_format::JSON_NATIVE) { + std::string prefix_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; + std::string suffix_marker = !format.section_end.empty() ? format.section_end : format.per_call_end; // these might happen earlier in the tools section as an example or somewhere else, so we need to find the closest ones size_t prefix_pos = prefix_marker.empty() ? 0 : diff.prefix.rfind(prefix_marker); size_t suffix_pos = suffix_marker.empty() ? diff.suffix.size() : diff.suffix.find(suffix_marker); @@ -1333,15 +1302,13 @@ void differential_analyzer::extract_args_markers(const common_chat_template & tm std::string args_end = after_common_suffix(suffix_cut, "{}", "\"XXXX\"}"); if (!args_start.empty() || !args_end.empty()) { - args_analysis.start = args_start; - args_analysis.end = args_end; + arguments.start = args_start; + arguments.end = args_end; } } } -tool_id_analysis differential_analyzer::extract_call_id_markers(const common_chat_template & tmpl, tool_format_analysis & analysis) { - tool_id_analysis result; - +void analyze_tools::extract_call_id_markers() { json assistant_id1 = json{ { "role", "assistant" }, { "content", "" }, @@ -1361,17 +1328,17 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha params.enable_thinking = true; auto comparison = compare_variants( - tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); }); + *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); }); if (!comparison) { LOG_DBG(ANSI_ORANGE "%s: Template application failed for call_id detection\n" ANSI_RESET, __func__); - return result; + return; } const auto & diff = comparison->diff; if (diff.left.empty() && diff.right.empty()) { - return result; + return; } std::string id_value_1 = "call00001"; @@ -1402,7 +1369,7 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha if (args_in_suffix != std::string::npos && (args_in_prefix == std::string::npos || args_in_prefix > diff.prefix.length())) { // Args are in suffix, so call_id is BETWEEN_FUNC_AND_ARGS - result.pos = call_id_position::BETWEEN_FUNC_AND_ARGS; + call_id.pos = call_id_position::BETWEEN_FUNC_AND_ARGS; // The prefix ends with: ... // Segmentize to find the call_id_prefix marker @@ -1427,12 +1394,12 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha } if (!marker_before_id.empty()) { - result.prefix = marker_before_id; + call_id.prefix = marker_before_id; } else { // Fallback: look for the last marker in after_func for (int i = (int) segments.size() - 1; i >= 0; i--) { if (segments[i].type == segment_type::MARKER) { - result.prefix = segments[i].value; + call_id.prefix = segments[i].value; break; } } @@ -1442,7 +1409,7 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha auto suffix_segments = segmentize_markers(diff.suffix); for (size_t i = 0; i < suffix_segments.size(); i++) { if (suffix_segments[i].type == segment_type::MARKER) { - result.suffix = suffix_segments[i].value; + call_id.suffix = suffix_segments[i].value; break; } // Stop if we hit the args @@ -1452,7 +1419,7 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha } } else if (args_in_prefix != std::string::npos) { // Args are in prefix, so call_id is POST_ARGS - result.pos = call_id_position::POST_ARGS; + call_id.pos = call_id_position::POST_ARGS; // Extract markers from between args and the ID std::string after_args = diff.prefix.substr(args_in_prefix); @@ -1462,7 +1429,7 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha auto segments = segmentize_markers(between_args_and_id); for (int i = (int) segments.size() - 1; i >= 0; i--) { if (segments[i].type == segment_type::MARKER) { - result.prefix = segments[i].value; + call_id.prefix = segments[i].value; break; } } @@ -1472,20 +1439,20 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha auto suffix_segments = segmentize_markers(diff.suffix); for (const auto & seg : suffix_segments) { if (seg.type == segment_type::MARKER) { - result.suffix = seg.value; + call_id.suffix = seg.value; break; } } } } else if (func_name_in_suffix != std::string::npos && func_name_in_prefix == std::string::npos) { // Function name is only in suffix - call_id is PRE_FUNC_NAME - result.pos = call_id_position::PRE_FUNC_NAME; + call_id.pos = call_id_position::PRE_FUNC_NAME; // Extract call_id_prefix from prefix (last marker before the common_id_part) auto prefix_segments = segmentize_markers(diff.prefix); for (int i = (int) prefix_segments.size() - 1; i >= 0; i--) { if (prefix_segments[i].type == segment_type::MARKER) { - result.prefix = prefix_segments[i].value; + call_id.prefix = prefix_segments[i].value; break; } } @@ -1495,7 +1462,7 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha auto suffix_segments = segmentize_markers(before_func); for (const auto & seg : suffix_segments) { if (seg.type == segment_type::MARKER) { - result.suffix = seg.value; + call_id.suffix = seg.value; break; } } @@ -1503,45 +1470,10 @@ tool_id_analysis differential_analyzer::extract_call_id_markers(const common_cha // When call_id is detected, per_call_end may have been incorrectly set to include // the call_id_suffix and sample args. Clear it if it starts with call_id_suffix. - if (result.pos != call_id_position::NONE && !result.suffix.empty() && - analysis.per_call_end.find(result.suffix) == 0) { - analysis.per_call_end.clear(); + if (call_id.pos != call_id_position::NONE && !call_id.suffix.empty() && + format.per_call_end.find(call_id.suffix) == 0) { + format.per_call_end.clear(); } - - return result; } -void differential_analyzer::collect_preserved_tokens(diff_analysis_result & result) { - auto & tokens = result.preserved_tokens; - - auto add_token = [&tokens](const std::string & org_token) { - std::string token = trim_whitespace(org_token); - if (!token.empty()) { - // Avoid duplicates - if (std::find(tokens.begin(), tokens.end(), token) == tokens.end()) { - tokens.push_back(token); - } - } - }; - - add_token(result.reasoning.start); - add_token(result.reasoning.end); - add_token(result.content.start); - add_token(result.content.end); - add_token(result.tools.format.section_start); - add_token(result.tools.format.section_end); - add_token(result.tools.format.per_call_start); - add_token(result.tools.format.per_call_end); - add_token(result.tools.function.name_prefix); - add_token(result.tools.function.name_suffix); - add_token(result.tools.function.close); - add_token(result.tools.arguments.start); - add_token(result.tools.arguments.end); - add_token(result.tools.arguments.name_prefix); - add_token(result.tools.arguments.name_suffix); - add_token(result.tools.arguments.separator); - add_token(result.tools.arguments.value_prefix); - add_token(result.tools.arguments.value_suffix); - add_token(result.tools.call_id.prefix); - add_token(result.tools.call_id.suffix); -} +} // namespace autoparser diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h index b1bfc83283..a94c40459c 100644 --- a/common/chat-diff-analyzer.h +++ b/common/chat-diff-analyzer.h @@ -2,6 +2,7 @@ #include "chat.h" #include "jinja/caps.h" +#include "peg-parser.h" #include "nlohmann/json.hpp" #include @@ -12,6 +13,8 @@ using json = nlohmann::ordered_json; +class common_chat_peg_unified_builder; + // ============================================================================ // Parameters for template application // ============================================================================ @@ -41,6 +44,10 @@ struct compare_variants_result { std::string output_B; }; +namespace autoparser { + +struct templates_params; + // ============================================================================ // Marker Registry: All markers extracted via differential analysis // ============================================================================ @@ -182,21 +189,9 @@ inline std::ostream & operator<<(std::ostream & os, const tool_format & format) } } -struct reasoning_analysis { - reasoning_mode mode = reasoning_mode::NONE; - - std::string start; // e.g., "", "[THINK]", "<|START_THINKING|>", "" - std::string end; // e.g., "", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>" -}; - -struct content_analysis { - content_mode mode = content_mode::PLAIN; - - std::string start; // e.g., "", ">>>all\n", "" - std::string end; // e.g., "", "" - - bool requires_nonnull_content = false; -}; +// ============================================================================ +// Sub-structs for tool analysis +// ============================================================================ struct tool_format_analysis { tool_format mode = tool_format::NONE; @@ -240,127 +235,176 @@ struct tool_id_analysis { std::string suffix; // e.g., "" (marker after call ID value, before next section) }; -struct tool_analysis { +// ============================================================================ +// Parser build context (shared interface for build_parser methods) +// ============================================================================ + +struct analyze_content; + +struct parser_build_context { + common_chat_peg_unified_builder & p; + const templates_params & inputs; + common_peg_parser reasoning_parser; + bool extracting_reasoning = false; + const analyze_content * content = nullptr; + + parser_build_context(common_chat_peg_unified_builder & p, const templates_params & inputs); +}; + +// ============================================================================ +// Base class for analyzers with parser building +// ============================================================================ + +struct analyze_base { + virtual ~analyze_base() = default; + virtual common_peg_parser build_parser(parser_build_context & ctx) const = 0; + + protected: + const common_chat_template * tmpl = nullptr; + + analyze_base() = default; + explicit analyze_base(const common_chat_template & tmpl) : tmpl(&tmpl) {} +}; + +// ============================================================================ +// Reasoning analyzer +// ============================================================================ + +struct analyze_reasoning : analyze_base { + reasoning_mode mode = reasoning_mode::NONE; + + std::string start; // e.g., "", "[THINK]", "<|START_THINKING|>", "" + std::string end; // e.g., "", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>" + + analyze_reasoning() = default; + analyze_reasoning(const common_chat_template & tmpl, bool supports_tools); + + common_peg_parser build_parser(parser_build_context & ctx) const override; + + private: + // Look for reasoning markers in rendered content + void compare_reasoning_presence(); + + // Compare generation prompt with enable_thinking=true vs false + void compare_thinking_enabled(); + + // Check if reasoning is always possible or only in tool calls + void compare_reasoning_scope(); +}; + +// ============================================================================ +// Content analyzer +// ============================================================================ + +struct analyze_content : analyze_base { + content_mode mode = content_mode::PLAIN; + + std::string start; // e.g., "", ">>>all\n", "" + std::string end; // e.g., "", "" + + bool requires_nonnull_content = false; + + analyze_content() = default; + analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning); + + common_peg_parser build_parser(parser_build_context & ctx) const override; + + bool is_always_wrapped() const; + common_peg_parser build_optional_wrapped(parser_build_context & ctx) const; +}; + +// ============================================================================ +// Tool analyzer +// ============================================================================ + +struct analyze_tools : analyze_base { tool_format_analysis format; tool_function_analysis function; tool_arguments_analysis arguments; tool_id_analysis call_id; + + analyze_tools() = default; + analyze_tools(const common_chat_template & tmpl, + const jinja::caps & caps, + const analyze_reasoning & reasoning); + + common_peg_parser build_parser(parser_build_context & ctx) const override; + + private: + // Extract tool calling 'haystack' for further analysis and delegate further analysis based on format + void analyze_tool_calls(const analyze_reasoning & reasoning); + + // Analyze format based on position of function and argument name in needle + void analyze_tool_call_format(const std::string & haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle, + const analyze_reasoning & reasoning); + + // Analyze specifics of JSON native format (entire tool call is a JSON object) + void analyze_tool_call_format_json_native(const std::string & clean_haystack, + const std::string & fun_name_needle, + const std::string & arg_name_needle); + + // Analyze specifics of non-JSON native format (tags for function name or for function name and arguments) + void analyze_tool_call_format_non_json(const std::string & clean_haystack, + const std::string & fun_name_needle); + + // Check for and extract specific per-call markers for non-native-JSON templates with parallel call support + void check_per_call_markers(); + + // Extract function name markers + void extract_function_markers(); + + // Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis + void analyze_arguments(); + + // Extract argument name markers + void extract_argument_name_markers(); + + // Extract argument value markers + void extract_argument_value_markers(); + + // Extract argument separator, if specified (eg. ......) + void extract_argument_separator(); + + // Extract argument wrapper markers, if present (eg. '......') + void extract_args_markers(); + + // Extract call ID markers, if present + void extract_call_id_markers(); + + // Per-format tool parser builders + common_peg_parser build_tool_parser_json_native(parser_build_context & ctx) const; + common_peg_parser build_tool_parser_tag_json(parser_build_context & ctx) const; + common_peg_parser build_tool_parser_tag_tagged(parser_build_context & ctx) const; }; -// Complete result of differential analysis -struct diff_analysis_result { +// ============================================================================ +// Top-level template analyzer (merges differential_analyzer + diff_analysis_result) +// ============================================================================ + +struct analyze_template { jinja::caps jinja_caps; - reasoning_analysis reasoning; - content_analysis content; - tool_analysis tools; + analyze_reasoning reasoning; + analyze_content content; + analyze_tools tools; // Preserved tokens for tokenizer (union of all non-empty markers) std::vector preserved_tokens; -}; -// Performs systematic differential analysis on chat templates -// Uses comparison matrix to extract markers without heuristics -class differential_analyzer { - public: - // Main entry point: Run full differential analysis on a template - static diff_analysis_result analyze(const common_chat_template & tmpl); + // Constructor: runs full differential analysis on a template + explicit analyze_template(const common_chat_template & tmpl); - // Phase-specific analysis (can be called individually for testing) - static reasoning_analysis analyze_reasoning(const common_chat_template & tmpl, bool supports_tools); - static content_analysis analyze_content(const common_chat_template & tmpl, const reasoning_analysis & reasoning); - static tool_analysis analyze_tools(const common_chat_template & tmpl, - const jinja::caps & caps, - const reasoning_analysis & reasoning); - - // Factorized differential comparison function (public for testing) - // Takes base params and a single modifier lambda to create variant B - // Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure - static std::optional compare_variants( - const common_chat_template & tmpl, - const template_params & params_A, - const std::function & params_modifier); + // Build the unified PEG parser for this template + common_peg_arena build_parser(const templates_params & inputs) const; private: - // Comparison helpers (implement the comparison matrix from the plan) - - // 1. Reasoning analysis: - // Look for reasoning markers in rendered content - static void compare_reasoning_presence(const common_chat_template & tmpl, reasoning_analysis & reasoning); - - // Compare generation prompt with enable_thinking=true vs false - static void compare_thinking_enabled(const common_chat_template & tmpl, reasoning_analysis & reasoning); - - // Check if reasoning is always possible or only in tool calls - static void compare_reasoning_scope(const common_chat_template & tmpl, reasoning_analysis & reasoning); - - // 2. Content (fully inside analyze_content mentioned above) - - // 3. Tool calls - // a. format - // Extract tool calling 'haystack' for further analysis and delegate further analysis based on format - static tool_format_analysis analyze_tool_calls(const common_chat_template & tmpl, - const reasoning_analysis & reasoning); - - // Analyze format based on position of function and argument name in needle - static tool_format_analysis analyze_tool_call_format(const std::string & haystack, - const std::string & fun_name_needle, - const std::string & arg_name_needle, - const reasoning_analysis & reasoning); - - // Analyze specifics of JSON native format (entire tool call is a JSON object) - static void analyze_tool_call_format_json_native(const std::string & clean_haystack, - const std::string & fun_name_needle, - const std::string & arg_name_needle, - tool_format_analysis & format); - - // Analyze specifics of non-JSON native format (tags for function name or for function name and arguments) - static void analyze_tool_call_format_non_json(const std::string & clean_haystack, - const std::string & fun_name_needle, - tool_format_analysis & format); - - // Check for and extract specific per-call markers for non-native-JSON templates with parallel call support - static void check_per_call_markers(const common_chat_template & tmpl, tool_format_analysis & result); - - // Logic below is only for non-JSON-native tool calling formats - // 3. b. function name - // Extract function name markers - static tool_function_analysis extract_function_markers(const common_chat_template & tmpl, - const tool_format_analysis & analysis); - - // 4. c. function arguments - // Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis - static tool_arguments_analysis analyze_arguments(const common_chat_template & tmpl, - const tool_analysis & analysis); - - // Extract argument name markers - static void extract_argument_name_markers(const common_chat_template & tmpl, - tool_arguments_analysis & args_analysis); - - // Extract argument value markers - static void extract_argument_value_markers(const common_chat_template & tmpl, - const tool_analysis & analysis, - tool_arguments_analysis & args_analysis); - - // Extract argument separator, if specified (eg. ......) - static void extract_argument_separator(const common_chat_template & tmpl, - tool_arguments_analysis & args_analysis); - - // Extract argument wrapper markers, if present (eg. '......') - static void extract_args_markers(const common_chat_template & tmpl, - const tool_analysis & analysis, - tool_arguments_analysis & args_analysis); - - // 4. d. function call id - // Extract call ID markers, if present - static tool_id_analysis extract_call_id_markers(const common_chat_template & tmpl, - tool_format_analysis & analysis); - // Collect tokens from entire analysis to preserve - static void collect_preserved_tokens(diff_analysis_result & result); - - static std::string apply_template(const common_chat_template & tmpl, const template_params & params); + void collect_preserved_tokens(); }; +} // namespace autoparser + enum segment_type { TEXT, MARKER }; inline std::ostream & operator<<(std::ostream & os, const segment_type & type) { diff --git a/common/chat.cpp b/common/chat.cpp index be4e19aebc..abc93392aa 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -239,8 +239,8 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates * const auto & tmpl = chat_templates->template_tool_use ? *chat_templates->template_tool_use : *chat_templates->template_default; - diff_analysis_result result = differential_analyzer::analyze(tmpl); - detect |= result.reasoning.mode != reasoning_mode::NONE; + autoparser::analyze_template result(tmpl); + detect |= result.reasoning.mode != autoparser::reasoning_mode::NONE; return detect; } @@ -752,7 +752,7 @@ static void foreach_parameter(const json & std::string common_chat_template_direct_apply( const common_chat_template & tmpl, - const struct templates_params & inputs, + const autoparser::templates_params & inputs, const std::optional & messages_override, const std::optional & tools_override, const std::optional & additional_context) { @@ -803,7 +803,7 @@ std::string common_chat_template_direct_apply( } static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, - const struct templates_params & inputs) { + const autoparser::templates_params & inputs) { common_chat_params data; // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja @@ -917,7 +917,7 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_ } static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, - const struct templates_params & inputs) { + const autoparser::templates_params & inputs) { common_chat_params data; // Copy reasoning to the "thinking" field as expected by the gpt-oss template @@ -1063,7 +1063,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp // Functionary v3.2 - uses recipient-based format: >>>recipient\n{content} static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, - const struct templates_params & inputs) { + const autoparser::templates_params & inputs) { common_chat_params data; data.prompt = common_chat_template_direct_apply(tmpl, inputs); @@ -1116,16 +1116,14 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_ if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) { if (inputs.parallel_tool_calls) { return p.choice({ content_and_tools, tools_only }) + p.end(); - } else { - return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end(); } - } else { - if (inputs.parallel_tool_calls) { - return p.choice({ content_and_tools, content_only, tools_only }) + p.end(); - } - auto content_and_tool = content_until_tool + tool_choice; - return p.choice({ content_and_tool, content_only, tool_choice }) + p.end(); + return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end(); } + if (inputs.parallel_tool_calls) { + return p.choice({ content_and_tools, content_only, tools_only }) + p.end(); + } + auto content_and_tool = content_until_tool + tool_choice; + return p.choice({ content_and_tool, content_only, tool_choice }) + p.end(); }); data.parser = parser.save(); @@ -1204,7 +1202,7 @@ static void func_args_not_string(json & messages) { static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls, const struct common_chat_templates_inputs & inputs) { - templates_params params; + autoparser::templates_params params; params.tools = common_chat_tools_to_json_oaicompat(inputs.tools); const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use ? *tmpls->template_tool_use @@ -1282,7 +1280,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_ try { LOG_DBG("Using differential autoparser\n"); - auto auto_params = universal_peg_generator::generate_parser(tmpl, params); + auto auto_params = autoparser::universal_peg_generator::generate_parser(tmpl, params); return auto_params; } catch (const std::exception & e) { LOG_WRN("Automatic parser generation failed: %s\n", e.what()); diff --git a/common/chat.h b/common/chat.h index 00f8eb62b6..0492b82c44 100644 --- a/common/chat.h +++ b/common/chat.h @@ -23,6 +23,10 @@ using json = nlohmann::ordered_json; struct common_chat_templates; +namespace autoparser { +struct templates_params; +} // namespace autoparser + struct common_chat_tool_call { std::string name; std::string arguments; @@ -294,7 +298,7 @@ std::map common_chat_templates_get_caps(const common_chat_tem std::string common_chat_template_direct_apply( const common_chat_template & tmpl, - const struct templates_params & inputs, + const autoparser::templates_params & inputs, const std::optional & messages_override = std::nullopt, const std::optional & tools_override = std::nullopt, const std::optional & additional_context = std::nullopt); diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index 4f3f7f5ec2..c78428491f 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -10,6 +10,8 @@ #include #include +using namespace autoparser; + static void test_calculate_diff_split_basic(testing & t); static void test_calculate_diff_split_identical(testing & t); static void test_calculate_diff_split_common_prefix(testing & t); @@ -591,7 +593,7 @@ static void test_compare_variants_basic(testing & t) { p.messages[0]["content"] = "World"; }; - auto result = differential_analyzer::compare_variants(tmpl, params, modifier); + auto result = autoparser::compare_variants(tmpl, params, modifier); if (!t.assert_true("result should have value", result.has_value())) { return; @@ -614,7 +616,7 @@ static void test_compare_variants_messages_modifier(testing & t) { p.messages[0]["content"] = "B"; }; - std::optional result = differential_analyzer::compare_variants(tmpl, params, modifier); + std::optional result = autoparser::compare_variants(tmpl, params, modifier); if (!t.assert_true("result should have value", result.has_value())) { return; @@ -637,7 +639,7 @@ static void test_compare_variants_tools_modifier(testing & t) { p.tools[0]["name"] = "bar"; }; - auto result = differential_analyzer::compare_variants(tmpl, params, modifier); + auto result = autoparser::compare_variants(tmpl, params, modifier); if (!t.assert_true("result should have value", result.has_value())) { return; @@ -661,7 +663,7 @@ static void test_compare_variants_both_modifiers(testing & t) { p.messages[0]["role"] = "newuser"; }; - auto result = differential_analyzer::compare_variants(tmpl, params, modifier); + auto result = autoparser::compare_variants(tmpl, params, modifier); if (!t.assert_true("result should have value", result.has_value())) { return; @@ -684,7 +686,7 @@ static void test_compare_variants_template_failure(testing & t) { p.messages[0]["content"] = "World"; }; - auto result = differential_analyzer::compare_variants(tmpl, params, modifier); + auto result = autoparser::compare_variants(tmpl, params, modifier); t.assert_true("result should be nullopt on template failure", !result.has_value()); } @@ -699,7 +701,7 @@ static void test_compare_variants_identity(testing & t) { }); // No modifier - should use identity - auto result = differential_analyzer::compare_variants(tmpl, params, nullptr); + auto result = autoparser::compare_variants(tmpl, params, nullptr); if (!t.assert_true("result should have value", result.has_value())) { return; @@ -810,7 +812,7 @@ static void test_seed_oss_tool_presence(testing & t) { params_with_tools.add_generation_prompt = false; params_with_tools.enable_thinking = true; - auto result = differential_analyzer::compare_variants(tmpl, params_no_tools, + auto result = autoparser::compare_variants(tmpl, params_no_tools, [&](template_params & p) { p.messages = params_with_tools.messages; }); @@ -872,7 +874,7 @@ static void test_seed_oss_call_count(testing & t) { params_one.add_generation_prompt = false; params_one.enable_thinking = true; - auto result = differential_analyzer::compare_variants(tmpl, params_one, + auto result = autoparser::compare_variants(tmpl, params_one, [&](template_params & p) { p.messages = json::array({user_msg, assistant_two_calls}); }); @@ -964,7 +966,7 @@ static void test_seed_oss_function_names(testing & t) { params_alpha.add_generation_prompt = false; params_alpha.enable_thinking = true; - auto result = differential_analyzer::compare_variants(tmpl, params_alpha, + auto result = autoparser::compare_variants(tmpl, params_alpha, [&](template_params & p) { p.messages = json::array({user_msg, assistant_func_beta}); }); @@ -1068,7 +1070,7 @@ static void test_seed_oss_argument_count(testing & t) { params_zero.add_generation_prompt = false; params_zero.enable_thinking = true; - auto result_zero_one = differential_analyzer::compare_variants(tmpl, params_zero, + auto result_zero_one = autoparser::compare_variants(tmpl, params_zero, [&](template_params & p) { p.messages = json::array({user_msg, assistant_one_arg}); }); @@ -1086,7 +1088,7 @@ static void test_seed_oss_argument_count(testing & t) { params_one.add_generation_prompt = false; params_one.enable_thinking = true; - auto result_one_two = differential_analyzer::compare_variants(tmpl, params_one, + auto result_one_two = autoparser::compare_variants(tmpl, params_one, [&](template_params & p) { p.messages = json::array({user_msg, assistant_two_args}); }); @@ -1144,7 +1146,7 @@ static void test_seed_oss_args_presence(testing & t) { params_same.enable_thinking = true; // Test same arg vs other arg - auto result_same_other = differential_analyzer::compare_variants(tmpl, params_same, + auto result_same_other = autoparser::compare_variants(tmpl, params_same, [&](template_params & p) { p.messages = json::array({user_msg, assistant_other_arg}); }); @@ -1163,7 +1165,7 @@ static void test_seed_oss_args_presence(testing & t) { diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos); // Test same arg vs both args - auto result_same_both = differential_analyzer::compare_variants(tmpl, params_same, + auto result_same_both = autoparser::compare_variants(tmpl, params_same, [&](template_params & p) { p.messages = json::array({user_msg, assistant_both_args}); }); @@ -1212,7 +1214,7 @@ static void test_seed_oss_tool_with_reasoning(testing & t) { params_tool_only.add_generation_prompt = false; params_tool_only.enable_thinking = true; - auto result = differential_analyzer::compare_variants(tmpl, params_tool_only, + auto result = autoparser::compare_variants(tmpl, params_tool_only, [&](template_params & p) { p.messages = json::array({user_msg, assistant_tool_with_reasoning}); }); @@ -1285,7 +1287,7 @@ static void test_nemotron_reasoning_detection(testing & t) { params.enable_thinking = true; // Run differential analysis - auto analysis = differential_analyzer::analyze(tmpl); + auto analysis = autoparser::analyze_template(tmpl); // Check reasoning markers t.assert_equal("reasoning_start should be ''", "", analysis.reasoning.start); @@ -1306,7 +1308,7 @@ static void test_nemotron_tool_format(testing & t) { common_chat_template tmpl = load_nemotron_template(t); // Run differential analysis - auto analysis = differential_analyzer::analyze(tmpl); + auto analysis = autoparser::analyze_template(tmpl); // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped) t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.tools.format.section_start); @@ -1344,7 +1346,7 @@ static void test_cohere_reasoning_detection(testing & t) { common_chat_template tmpl = load_cohere_template(t); // Run differential analysis - auto analysis = differential_analyzer::analyze(tmpl); + auto analysis = autoparser::analyze_template(tmpl); // Check reasoning markers - Cohere uses special token format t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.reasoning.start); @@ -1365,7 +1367,7 @@ static void test_tool_format_cohere(testing & t) { common_chat_template tmpl = load_cohere_template(t); // Run differential analysis - auto analysis = differential_analyzer::analyze(tmpl); + auto analysis = autoparser::analyze_template(tmpl); // Check tool section markers - Cohere uses ACTION markers t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.tools.format.section_start); @@ -1772,12 +1774,12 @@ static void test_tagged_args_with_embedded_quotes(testing & t) { auto tool_choice = p.choice(); for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) continue; + if (!tool_def.contains("function")) { continue; } const auto & function = tool_def.at("function"); std::string name = function.at("name"); const auto & params = function.at("parameters"); - if (!params.contains("properties") || !params.at("properties").is_object()) continue; + if (!params.contains("properties") || !params.at("properties").is_object()) { continue; } const auto & properties = params.at("properties"); diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp index c87f3c8e35..17cbdfea18 100644 --- a/tools/parser/debug-template-parser.cpp +++ b/tools/parser/debug-template-parser.cpp @@ -279,7 +279,7 @@ static void render_scenario(const common_chat_template & tmpl, LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str()); try { - templates_params inputs; + autoparser::templates_params inputs; inputs.messages = final_messages; inputs.add_generation_prompt = add_generation_prompt; inputs.extra_context["enable_thinking"] = enable_thinking; @@ -395,10 +395,10 @@ int main(int argc, char ** argv) { LOG_ERR(" TEMPLATE ANALYSIS\n"); LOG_ERR("================================================================================\n"); - diff_analysis_result analysis = differential_analyzer::analyze(chat_template); + autoparser::analyze_template analysis(chat_template); // Generate Parser - templates_params params; + autoparser::templates_params params; params.messages = json::array(); params.reasoning_format = opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE; @@ -414,7 +414,7 @@ int main(int argc, char ** argv) { } params.parallel_tool_calls = false; - auto parser_data = universal_peg_generator::generate_parser(chat_template, params, analysis); + auto parser_data = autoparser::universal_peg_generator::generate_parser(chat_template, params, analysis); LOG_ERR("\n=== Differential Analysis Results ===\n"); diff --git a/tools/parser/template-analysis.cpp b/tools/parser/template-analysis.cpp index deb2bafa20..a92e104ac0 100644 --- a/tools/parser/template-analysis.cpp +++ b/tools/parser/template-analysis.cpp @@ -400,12 +400,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - templates_params params_no_tools; + autoparser::templates_params params_no_tools; params_no_tools.messages = json::array({ user_msg }); params_no_tools.add_generation_prompt = false; params_no_tools.tools = json::array(); - templates_params params_with_tools = params_no_tools; + autoparser::templates_params params_with_tools = params_no_tools; params_with_tools.tools = tools; std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools); @@ -419,12 +419,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - templates_params params_no_prompt; + autoparser::templates_params params_no_prompt; params_no_prompt.messages = json::array({ user_msg }); params_no_prompt.add_generation_prompt = false; params_no_prompt.tools = json::array(); - templates_params params_with_prompt = params_no_prompt; + autoparser::templates_params params_with_prompt = params_no_prompt; params_with_prompt.add_generation_prompt = true; std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt); @@ -438,12 +438,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - templates_params params_no_reasoning; + autoparser::templates_params params_no_reasoning; params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() }); params_no_reasoning.add_generation_prompt = false; params_no_reasoning.enable_thinking = true; - templates_params params_with_reasoning = params_no_reasoning; + autoparser::templates_params params_with_reasoning = params_no_reasoning; params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() }); std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); @@ -458,12 +458,12 @@ static void analyze_template(const std::string & template_path) { json user_msg = make_user_msg(); json user_msg2 = make_user_msg2(); - templates_params params_no_reasoning; + autoparser::templates_params params_no_reasoning; params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 }); params_no_reasoning.add_generation_prompt = false; params_no_reasoning.enable_thinking = true; - templates_params params_with_reasoning = params_no_reasoning; + autoparser::templates_params params_with_reasoning = params_no_reasoning; params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 }); std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); @@ -477,12 +477,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - templates_params params_no_tool; + autoparser::templates_params params_no_tool; params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() }); params_no_tool.add_generation_prompt = false; params_no_tool.tools = tools; - templates_params params_with_tool = params_no_tool; + autoparser::templates_params params_with_tool = params_no_tool; params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() }); std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool); @@ -497,12 +497,12 @@ static void analyze_template(const std::string & template_path) { json user_msg = make_user_msg(); json user_msg2 = make_user_msg2_continue(); - templates_params params_no_tool; + autoparser::templates_params params_no_tool; params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 }); params_no_tool.add_generation_prompt = false; params_no_tool.tools = tools; - templates_params params_with_tool = params_no_tool; + autoparser::templates_params params_with_tool = params_no_tool; params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 }); std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool); @@ -516,12 +516,12 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - templates_params params_one_tool; + autoparser::templates_params params_one_tool; params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() }); params_one_tool.add_generation_prompt = false; params_one_tool.tools = tools; - templates_params params_two_tools = params_one_tool; + autoparser::templates_params params_two_tools = params_one_tool; params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() }); std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool); @@ -536,12 +536,12 @@ static void analyze_template(const std::string & template_path) { json user_msg = make_user_msg(); json user_msg2 = make_user_msg2_continue(); - templates_params params_one_tool; + autoparser::templates_params params_one_tool; params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 }); params_one_tool.add_generation_prompt = false; params_one_tool.tools = tools; - templates_params params_two_tools = params_one_tool; + autoparser::templates_params params_two_tools = params_one_tool; params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 }); std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool); @@ -555,13 +555,13 @@ static void analyze_template(const std::string & template_path) { { json user_msg = make_user_msg(); - templates_params params_no_reasoning; + autoparser::templates_params params_no_reasoning; params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() }); params_no_reasoning.add_generation_prompt = false; params_no_reasoning.tools = tools; params_no_reasoning.enable_thinking = true; - templates_params params_with_reasoning = params_no_reasoning; + autoparser::templates_params params_with_reasoning = params_no_reasoning; params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() }); std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning); From 61e18cad3f74f184e0d77b12896417b4cd9cda1e Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 14 Feb 2026 00:26:17 +0100 Subject: [PATCH 36/39] Create basic content parser if no parser definition found --- common/chat.cpp | 11 ++++++++--- common/chat.h | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index abc93392aa..4c03d030ff 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -11,6 +11,7 @@ #include "jinja/value.h" #include "jinja/runtime.h" #include "jinja/caps.h" +#include "peg-parser.h" #include #include @@ -1368,12 +1369,16 @@ common_chat_msg common_chat_parse(const std::string & input, return common_chat_peg_parse(params.parser, input, is_partial, params); } -common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, +common_chat_msg common_chat_peg_parse(const common_peg_arena & src_parser, const std::string & input, bool is_partial, const common_chat_parser_params & params) { - if (parser.empty()) { - throw std::runtime_error("Failed to parse due to missing parser definition."); + const common_peg_arena & parser = src_parser.empty() ? + build_chat_peg_unified_parser([](common_chat_peg_unified_builder & p) { return p.content(p.rest()) + p.end(); }) : + src_parser; + + if (src_parser.empty()) { + LOG_WRN("No parser definition detected, assuming pure content parser."); } LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str()); diff --git a/common/chat.h b/common/chat.h index 0492b82c44..8ccdba03e1 100644 --- a/common/chat.h +++ b/common/chat.h @@ -272,7 +272,7 @@ std::string common_chat_format_example(const struct common_chat_templates * const char * common_chat_format_name(common_chat_format format); common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params); -common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & params); +common_chat_msg common_chat_peg_parse(const common_peg_arena & src_parser, const std::string & input, bool is_partial, const common_chat_parser_params & params); // used by arg and server const char * common_reasoning_format_name(common_reasoning_format format); From 0884aad1c56d2938dad7449232cfe25bc833669f Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 14 Feb 2026 00:37:33 +0100 Subject: [PATCH 37/39] Remove stupid LLM-generated method comment headers (yeah, we can see what the method name is, thank you very much) --- common/chat-auto-parser-generator.cpp | 16 ---------------- common/chat-diff-analyzer.cpp | 16 ---------------- 2 files changed, 32 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index bc2b1f7bbe..072bdb795f 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -74,10 +74,6 @@ common_chat_params universal_peg_generator::generate_parser(const common_chat_te return data; } -// ============================================================================ -// analyze_template::build_parser - orchestrates parser building -// ============================================================================ - common_peg_arena analyze_template::build_parser(const templates_params & inputs) const { return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) { p.set_allow_python_dict_format(true); @@ -107,10 +103,6 @@ common_peg_arena analyze_template::build_parser(const templates_params & inputs) }); } -// ============================================================================ -// analyze_reasoning::build_parser -// ============================================================================ - common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) const { auto & p = ctx.p; @@ -139,10 +131,6 @@ common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) co return p.eps(); } -// ============================================================================ -// analyze_content::build_parser -// ============================================================================ - common_peg_parser analyze_content::build_parser(parser_build_context & ctx) const { auto & p = ctx.p; @@ -164,10 +152,6 @@ common_peg_parser analyze_content::build_optional_wrapped(parser_build_context & return p.eps(); } -// ============================================================================ -// analyze_tools::build_parser - dispatches to format-specific builders -// ============================================================================ - common_peg_parser analyze_tools::build_parser(parser_build_context & ctx) const { switch (format.mode) { case tool_format::JSON_NATIVE: diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 2256e48976..b49ce22ef4 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -138,10 +138,6 @@ static json second_tool_call = static json first_tool_call_alt_id = build_tool_call("foofoo", json{{ "first", "XXXX" }, { "second", "YYYY" }}, "call99999"); -// ============================================================================ -// analyze_template -// ============================================================================ - analyze_template::analyze_template(const common_chat_template & tmpl) : jinja_caps(tmpl.original_caps()) , reasoning(tmpl, jinja_caps.supports_tool_calls) @@ -192,10 +188,6 @@ void analyze_template::collect_preserved_tokens() { add_token(tools.call_id.suffix); } -// ============================================================================ -// analyze_reasoning -// ============================================================================ - analyze_reasoning::analyze_reasoning(const common_chat_template & tmpl, bool supports_tools) : analyze_base(tmpl) { LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET); @@ -454,10 +446,6 @@ void analyze_reasoning::compare_reasoning_scope() { } } -// ============================================================================ -// analyze_content -// ============================================================================ - analyze_content::analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning) : analyze_base(tmpl) { LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET); @@ -549,10 +537,6 @@ bool analyze_content::is_always_wrapped() const { return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty(); } -// ============================================================================ -// analyze_tools -// ============================================================================ - analyze_tools::analyze_tools(const common_chat_template & tmpl, const jinja::caps & caps, const analyze_reasoning & reasoning) From e501e1dec917a9d41253551659b1b490cc396f1c Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 14 Feb 2026 00:56:22 +0100 Subject: [PATCH 38/39] Basic universal PEG parser wrapper with tag-to-dict based extractor --- common/chat-peg-parser.cpp | 25 ++++++++++++++ common/chat-peg-parser.h | 21 ++++++++++++ tests/test-chat-peg-parser.cpp | 59 ++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index 6e58dc6761..039e52177c 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -177,6 +177,31 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) { } } +void tag_based_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) { + arena.visit(result, [this](const common_peg_ast_node & node) { + if (!node.tag.empty()) { + tags[node.tag] = std::string(node.text); + } + }); +} + +tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, bool is_partial) const { + common_peg_parse_context ctx(input, is_partial); + auto parse_result = arena.parse(ctx); + + tag_based_peg_mapper mapper; + mapper.from_ast(ctx.ast, parse_result); + + return { std::move(parse_result), std::move(mapper.tags) }; +} + +tagged_peg_parser build_tagged_peg_parser( + const std::function & fn) { + common_peg_parser_builder builder; + builder.set_root(fn(builder)); + return { builder.build() }; +} + common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string & tag_name, const std::string & marker, const common_peg_parser & p) { diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h index c0392f0c5d..6219c819d6 100644 --- a/common/chat-peg-parser.h +++ b/common/chat-peg-parser.h @@ -138,6 +138,27 @@ inline common_peg_arena build_chat_peg_unified_parser( return builder.build(); } +class tag_based_peg_mapper { + public: + std::map tags; + + void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result); +}; + +struct tagged_parse_result { + common_peg_parse_result result; + std::map tags; +}; + +struct tagged_peg_parser { + common_peg_arena arena; + + tagged_parse_result parse_and_extract(const std::string & input, bool is_partial = false) const; +}; + +tagged_peg_parser build_tagged_peg_parser( + const std::function & fn); + class common_chat_peg_unified_mapper : public common_chat_peg_mapper { std::optional pending_tool_call; // Tool call waiting for name common_chat_tool_call * current_tool = nullptr; diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp index d59880e3dc..95a989e6f8 100644 --- a/tests/test-chat-peg-parser.cpp +++ b/tests/test-chat-peg-parser.cpp @@ -20,6 +20,7 @@ static void test_example_qwen3_coder(testing & t); static void test_example_qwen3_non_coder(testing & t); static void test_command7_parser_compare(testing & t); static void test_prefix_tool_names(testing & t); +static void test_tagged_peg_parser(testing & t); int main(int argc, char * argv[]) { testing t(std::cout); @@ -37,6 +38,7 @@ int main(int argc, char * argv[]) { t.test("qwen3 non-coder", test_example_qwen3_non_coder); t.test("comparison", test_command7_parser_compare); t.test("prefix tool names", test_prefix_tool_names); + t.test("tagged peg parser", test_tagged_peg_parser); return t.summary(); } @@ -878,3 +880,60 @@ static void test_prefix_tool_names(testing & t) { } }); } + +static void test_tagged_peg_parser(testing & t) { + t.test("basic tag extraction", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + return p.tag("greeting", p.until(" ")) + " " + p.tag("name", p.rest()) + p.end(); + }); + + auto result = parser.parse_and_extract("Hello World"); + t.assert_true("success", result.result.success()); + t.assert_equal("greeting tag", "Hello", result.tags.at("greeting")); + t.assert_equal("name tag", "World", result.tags.at("name")); + }); + + t.test("duplicate tags overwrite", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + return p.tag("item", p.until(",")) + "," + p.tag("item", p.rest()) + p.end(); + }); + + auto result = parser.parse_and_extract("first,second"); + t.assert_true("success", result.result.success()); + t.assert_equal("item tag", "second", result.tags.at("item")); + }); + + t.test("no tags extracted", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + return p.rest() + p.end(); + }); + + auto result = parser.parse_and_extract("Hello"); + t.assert_true("success", result.result.success()); + t.assert_equal("empty tags", 0u, result.tags.size()); + }); + + t.test("structured extraction", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + auto header = p.tag("header", p.until("\n")); + auto body = p.tag("body", p.rest()); + return header + "\n" + body + p.end(); + }); + + auto result = parser.parse_and_extract("Title\nBody content here"); + t.assert_true("success", result.result.success()); + t.assert_equal("header", "Title", result.tags.at("header")); + t.assert_equal("body", "Body content here", result.tags.at("body")); + }); + + t.test("partial parse", [&](testing & t) { + auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) { + return p.tag("prefix", p.until(":")) + ":" + p.tag("value", p.rest()) + p.end(); + }); + + auto result = parser.parse_and_extract("key:val", true); + t.assert_true("not fail", !result.result.fail()); + t.assert_equal("prefix tag", "key", result.tags.at("prefix")); + t.assert_equal("value tag", "val", result.tags.at("value")); + }); +} From e384c6fefe4ca8cfd72550c58b71ece374aa35dc Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Sat, 14 Feb 2026 01:28:53 +0100 Subject: [PATCH 39/39] Add "marker" PEG parser + sample in analyzer --- common/chat-diff-analyzer.cpp | 21 +++++++++++++++------ common/peg-parser.cpp | 6 ++++++ common/peg-parser.h | 3 +++ tests/peg-parser/test-basic.cpp | 17 +++++++++++++++++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index b49ce22ef4..81c8255a11 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -1,9 +1,11 @@ #include "chat-diff-analyzer.h" #include "chat-auto-parser-helpers.h" +#include "chat-peg-parser.h" #include "chat.h" #include "log.h" #include "nlohmann/json.hpp" +#include "peg-parser.h" #include #include @@ -261,7 +263,7 @@ void analyze_reasoning::compare_reasoning_presence() { // prefix: ... auto suf_seg = prune_whitespace_segments(segmentize_markers(diff.suffix)); if (trim_whitespace(diff.left).empty() && suf_seg.size() >= 2 && suf_seg[0].type == segment_type::MARKER && - trim_whitespace(suf_seg[1].value).substr(0, 11) == "I can help.") { + trim_whitespace(suf_seg[1].value).find("I can help.") == 0) { auto pre_seg = prune_whitespace_segments(segmentize_markers(diff.prefix)); if (pre_seg[pre_seg.size() - 1].type == segment_type::MARKER || (pre_seg.size() > 1 && trim_whitespace(pre_seg[pre_seg.size() - 1].value).empty() && @@ -492,19 +494,26 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz bool found_plain_content = false; if (trim_whitespace(diff_tools.left) == response) { - auto segments = segmentize_markers(diff_reasoning.left); + auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) { + return p.space() + diff_reasoning.left + p.space() + p.optional(p.marker()) + p.space() + p.end(); + }); + if (parser.parse_and_extract(diff_reasoning.left).result.success()) { + // We only have the content text in the diff (possibly with a stray EOG marker), so no markers + mode = content_mode::PLAIN; + found_plain_content = true; + } + /* auto segments = segmentize_markers(diff_reasoning.left); if (trim_whitespace(diff_reasoning.left) == response || (segments.size() == 2 && trim_whitespace(segments[0].value) == response)) { // We only have the content text in the diff (possibly with a stray EOG marker), so no markers - mode = content_mode::PLAIN; + mode = content_mode::PLAIN; found_plain_content = true; - } else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty() && + }*/ else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty() && diff_reasoning.left.find(reasoning.end) != std::string::npos) { std::string post_closed_reasoning = diff_reasoning.left.substr( diff_reasoning.left.find(reasoning.end) + reasoning.end.length()); if (trim_whitespace(post_closed_reasoning) == "Response text") { - LOG_DBG("C1: No content markers after stripping reasoning close marker\n"); - mode = content_mode::PLAIN; + mode = content_mode::PLAIN; found_plain_content = true; } } diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp index f1b10b21a5..1545a24210 100644 --- a/common/peg-parser.cpp +++ b/common/peg-parser.cpp @@ -1424,6 +1424,12 @@ common_peg_parser common_peg_parser_builder::python_dict() { }); } +common_peg_parser common_peg_parser_builder::marker() { + auto sharp_bracket_parser = literal("<") + until(">") + literal(">"); + auto square_bracket_parser = literal("[") + until("]") + literal("]"); + return choice({ sharp_bracket_parser, square_bracket_parser }); +} + common_peg_parser common_peg_parser_builder::json_member(const std::string & key, const common_peg_parser & p) { auto ws = space(); return sequence({ diff --git a/common/peg-parser.h b/common/peg-parser.h index 947c775f10..b5e7ae13cf 100644 --- a/common/peg-parser.h +++ b/common/peg-parser.h @@ -456,6 +456,9 @@ class common_peg_parser_builder { common_peg_parser python_dict_bool(); common_peg_parser python_dict_null(); + // A marker, i.e. text delimited by a pair of <> or [] + common_peg_parser marker(); + // Wraps a parser with JSON schema metadata for grammar generation. // Used internally to convert JSON schemas to GBNF grammar rules. common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false); diff --git a/tests/peg-parser/test-basic.cpp b/tests/peg-parser/test-basic.cpp index 1bda6f2e69..872f16a78d 100644 --- a/tests/peg-parser/test-basic.cpp +++ b/tests/peg-parser/test-basic.cpp @@ -1,3 +1,4 @@ +#include "peg-parser.h" #include "tests.h" void test_basic(testing & t) { @@ -450,5 +451,21 @@ void test_basic(testing & t) { t.assert_equal("result_is_fail", true, result.fail()); }); + + // Test markers + t.test("marker", [](testing &t) { + auto bracket_parser = build_peg_parser([](common_peg_parser_builder & p) { + return p.marker(); + }); + + common_peg_parse_context ctx_square("[marker]", false); + common_peg_parse_context ctx_sharp("", false); + + auto result_square = bracket_parser.parse(ctx_square); + auto result_sharp = bracket_parser.parse(ctx_sharp); + + t.assert_true("result_square_is_success", result_square.success()); + t.assert_true("result_sharp_is_success", result_sharp.success()); + }); }); }