ANOTHER GIANT POST-FIXUP SQUISH

This commit is contained in:
Piotr Wilkin 2026-01-21 14:28:18 +01:00 committed by Piotr Wilkin
parent 31274f9bd1
commit 0d4179c8aa
32 changed files with 15068 additions and 5029 deletions

12
1 Normal file
View File

@ -0,0 +1,12 @@
Unknown option: 2
Usage: llama-template-analysis [options]
Options:
--template <name> Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')
--template-file <path> Analyze custom template file
--all Analyze all templates from test suite
Examples:
llama-template-analysis --all
llama-template-analysis --template deepseek
llama-template-analysis --template-file my-template.jinja

View File

@ -1,95 +1,332 @@
{
"version": 4,
"configurePresets": [
{
"name": "base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/build-${presetName}",
"cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
"version": 4,
"configurePresets": [
{
"name": "base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/build-${presetName}",
"cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
}
},
{
"name": "sycl-base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/build-${presetName}",
"cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_CXX_COMPILER": "icx",
"CMAKE_C_COMPILER": "cl",
"GGML_SYCL": "ON",
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
}
},
{
"name": "debug",
"hidden": true,
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug"
}
},
{
"name": "release",
"hidden": true,
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
}
},
{
"name": "reldbg",
"hidden": true,
"cacheVariables": {
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
}
},
{
"name": "static",
"hidden": true,
"cacheVariables": {
"GGML_STATIC": "ON"
}
},
{
"name": "sycl_f16",
"hidden": true,
"cacheVariables": {
"GGML_SYCL_F16": "ON"
}
},
{
"name": "vulkan",
"hidden": true,
"cacheVariables": {
"GGML_VULKAN": "ON"
}
},
{
"name": "x64-windows-llvm",
"hidden": true,
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
}
},
{
"name": "arm64-windows-llvm",
"hidden": true,
"architecture": {
"value": "arm64",
"strategy": "external"
},
"toolset": {
"value": "host=x64",
"strategy": "external"
},
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
}
},
{
"name": "arm64-apple-clang",
"hidden": true,
"architecture": {
"value": "arm64",
"strategy": "external"
},
"toolset": {
"value": "host=x64",
"strategy": "external"
},
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
}
},
{
"name": "x64-linux-gcc",
"hidden": true,
"cacheVariables": {
"CMAKE_C_COMPILER": "gcc",
"CMAKE_CXX_COMPILER": "g++"
}
},
{
"name": "x64-linux-gcc-debug",
"inherits": [
"base",
"x64-linux-gcc",
"debug"
]
},
{
"name": "x64-linux-gcc-release",
"inherits": [
"base",
"x64-linux-gcc",
"release"
]
},
{
"name": "x64-linux-gcc-reldbg",
"inherits": [
"base",
"x64-linux-gcc",
"reldbg"
]
},
{
"name": "x64-linux-gcc+static-release",
"inherits": [
"base",
"x64-linux-gcc",
"release",
"static"
]
},
{
"name": "arm64-windows-llvm-debug",
"inherits": [
"base",
"arm64-windows-llvm",
"debug"
]
},
{
"name": "arm64-windows-llvm-release",
"inherits": [
"base",
"arm64-windows-llvm",
"reldbg"
]
},
{
"name": "arm64-windows-llvm+static-release",
"inherits": [
"base",
"arm64-windows-llvm",
"reldbg",
"static"
]
},
{
"name": "arm64-apple-clang-debug",
"inherits": [
"base",
"arm64-apple-clang",
"debug"
]
},
{
"name": "arm64-apple-clang-release",
"inherits": [
"base",
"arm64-apple-clang",
"reldbg"
]
},
{
"name": "arm64-apple-clang+static-release",
"inherits": [
"base",
"arm64-apple-clang",
"reldbg",
"static"
]
},
{
"name": "x64-windows-llvm-debug",
"inherits": [
"base",
"x64-windows-llvm",
"debug"
]
},
{
"name": "x64-windows-llvm-release",
"inherits": [
"base",
"x64-windows-llvm",
"release"
]
},
{
"name": "x64-windows-llvm-reldbg",
"inherits": [
"base",
"x64-windows-llvm",
"reldbg"
]
},
{
"name": "x64-windows-llvm+static-release",
"inherits": [
"base",
"x64-windows-llvm",
"reldbg",
"static"
]
},
{
"name": "x64-windows-msvc-debug",
"inherits": [
"base",
"debug"
]
},
{
"name": "x64-windows-msvc-release",
"inherits": [
"base",
"reldbg"
]
},
{
"name": "x64-windows-msvc+static-release",
"inherits": [
"base",
"reldbg",
"static"
]
},
{
"name": "x64-windows-sycl-debug",
"inherits": [
"sycl-base",
"debug"
]
},
{
"name": "x64-windows-sycl-debug-f16",
"inherits": [
"sycl-base",
"debug",
"sycl_f16"
]
},
{
"name": "x64-windows-sycl-release",
"inherits": [
"sycl-base",
"release"
]
},
{
"name": "x64-windows-sycl-release-f16",
"inherits": [
"sycl-base",
"release",
"sycl_f16"
]
},
{
"name": "x64-windows-vulkan-debug",
"inherits": [
"base",
"vulkan",
"debug"
]
},
{
"name": "x64-windows-vulkan-release",
"inherits": [
"base",
"vulkan",
"release"
]
},
{
"name": "ilintar-release",
"hidden": false,
"description": "Release build",
"displayName": "Release build",
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"GGML_CUDA": "ON",
"GGML_CUDA_FORCE_CUBLAS": "OFF",
"GGML_CUDA_FORCE_MMQ": "OFF",
"GGML_CUDA_FA_ALL_QUANTS": "1",
"CMAKE_CUDA_ARCHITECTURES": "86;120",
"GGML_BLAS": "ON",
"GGML_BLAS_VENDOR": "OpenBLAS",
"GGML_CPU_ALL_VARIANTS": "ON",
"GGML_BACKEND_DL": "ON",
"CMAKE_CUDA_COMPILER": "nvcc"
},
"inherits": [
"base",
"release",
"x64-linux-gcc-release"
]
}
},
{
"name": "sycl-base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/build-${presetName}",
"cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_CXX_COMPILER": "icx",
"CMAKE_C_COMPILER": "cl",
"GGML_SYCL": "ON",
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
],
"buildPresets": [
{
"name": "parallel",
"description": "Parallel build",
"displayName": "Parallel build",
"configurePreset": "ilintar-release",
"jobs": 8
}
},
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
{
"name": "x64-windows-llvm", "hidden": true,
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
}
},
{
"name": "arm64-windows-llvm", "hidden": true,
"architecture": { "value": "arm64", "strategy": "external" },
"toolset": { "value": "host=x64", "strategy": "external" },
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
}
},
{
"name": "arm64-apple-clang", "hidden": true,
"architecture": { "value": "arm64", "strategy": "external" },
"toolset": { "value": "host=x64", "strategy": "external" },
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
}
},
{
"name": "x64-linux-gcc", "hidden": true,
"cacheVariables": {
"CMAKE_C_COMPILER": "gcc",
"CMAKE_CXX_COMPILER": "g++"
}
},
{ "name": "x64-linux-gcc-debug", "inherits": [ "base", "x64-linux-gcc", "debug" ] },
{ "name": "x64-linux-gcc-release", "inherits": [ "base", "x64-linux-gcc", "release" ] },
{ "name": "x64-linux-gcc-reldbg", "inherits": [ "base", "x64-linux-gcc", "reldbg" ] },
{ "name": "x64-linux-gcc+static-release", "inherits": [ "base", "x64-linux-gcc", "release", "static" ] },
{ "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
{ "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
{ "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
{ "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
{ "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
{ "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
{ "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },
{ "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] },
{ "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
{ "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
{ "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
{ "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
{ "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
]
}
]
}

View File

@ -48,10 +48,11 @@ add_library(${TARGET} STATIC
arg.cpp
arg.h
base64.hpp
chat-auto-parser-analyzer.cpp
chat-auto-parser-generator.cpp
chat-auto-parser-helpers.cpp
chat-auto-parser.h
chat-diff-analyzer.cpp
chat-diff-analyzer.h
chat-peg-parser.cpp
chat-peg-parser.h
chat.cpp

File diff suppressed because it is too large Load Diff

View File

@ -1,250 +1,361 @@
#include "chat-auto-parser-helpers.h"
#include "chat-auto-parser.h"
#include "chat-diff-analyzer.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "json-schema-to-grammar.h"
#include "log.h"
#include "nlohmann/json.hpp"
#include <string>
#include <optional>
using json = nlohmann::ordered_json;
common_chat_params universal_peg_generator::generate_parser(const template_analysis_result & analysis,
const common_chat_template & tmpl,
const struct templates_params & inputs) {
// Helper to iterate over tools/functions
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
for (const auto & tool : tools) {
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
continue;
}
fn(tool);
}
}
common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs) {
// Run differential analysis to extract template structure
auto analysis = differential_analyzer::analyze(tmpl);
return generate_parser(tmpl, inputs, analysis);
}
common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs,
const diff_analysis_result & analysis) {
// Check for thinking forced open
bool thinking_forced_open = (analysis.reasoning == reasoning_mode::FORCED_OPEN);
bool thinking_forced_closed = (analysis.reasoning == reasoning_mode::FORCED_CLOSED);
// Build the parser using the analysis results
auto parser = build_parser(analysis, inputs, thinking_forced_open, thinking_forced_closed);
// Create the result structure
common_chat_params data;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = analysis.preserved_tokens;
data.parser = parser.save();
try {
LOG_DBG("%s\n", __func__);
// Build grammar if tools are present
bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
bool include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
// Patch messages if template requires non-null content
// Some templates (e.g., iquest) render null as "None" when concatenating strings
std::optional<json> messages_override;
if (analysis.tools.requires_nonnull_content && !inputs.messages.empty()) {
LOG_DBG("Patching null content to empty string (template requires non-null content)\n");
json patched_messages = inputs.messages;
for (auto & msg : patched_messages) {
if (msg.contains("content") && msg["content"].is_null()) {
msg["content"] = "";
}
}
messages_override = patched_messages;
}
if (include_grammar) {
data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
if (inputs.messages.empty()) {
// Some templates don't handle empty messages well - always leave something in
json message = {
{ { "role", "user" }, { "content", "Hello" } }
};
messages_override.emplace(message);
}
// Calculate prompt first to detect forced thinking
data.prompt = common_chat_template_direct_apply(tmpl, inputs, messages_override);
// Determine if thinking is forced open based on prompt ending
bool thinking_forced_open = false;
if (analysis.content.reasoning_mode == content_structure::REASONING_FORCED_OPEN) {
if (inputs.enable_thinking) {
thinking_forced_open = true;
LOG_DBG("Thinking forced open based on template analysis\n");
} else {
// Template ends with reasoning start marker but thinking is disabled
// Append the end marker to close it
data.prompt += analysis.content.reasoning_end;
LOG_DBG("Appended reasoning end marker since thinking is disabled\n");
}
}
data.thinking_forced_open = thinking_forced_open;
// Build the unified parser
auto arena = build_parser(analysis, tmpl, inputs, thinking_forced_open);
data.parser = arena.save();
// Determine format
bool has_tools =
inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
if (has_tools && analysis.tools.supports_tools) {
// Unified format that handles both JSON and tagged tool calls
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser with tool support (format: PEG_NATIVE)\n");
} else if (analysis.content.reasoning_mode != content_structure::REASONING_NONE) {
// Reasoning markers detected - use PEG parser to handle thinking blocks
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for reasoning handling (format: PEG_NATIVE)\n");
} else if (analysis.content.content_mode != content_structure::CONTENT_PLAIN) {
// Content markers detected - use PEG parser to strip them even without tools
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for content marker stripping (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
// Recipient-based format (e.g., Functionary v3.2): >>>recipient\n{content}
// Need PEG parser to handle recipient delimiter parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for recipient-based format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
// Tag-with-name format (e.g., func_name\n{args} for Functionary)
// Need PEG parser to handle function name parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for tag-with-name format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
// Bracket-tag format (e.g., [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} for Mistral Small 3.2)
// Need PEG parser to handle bracket tag parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for bracket-tag format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// Prefixed-indexed format (e.g., Kimi-K2)
// Need PEG parser to handle namespace and indexed format
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for prefixed-indexed format (format: PEG_NATIVE)\n");
} else {
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
LOG_DBG("Generated unified parser without tools or content markers (format: CONTENT_ONLY)\n");
}
// Determine trigger word for lazy grammar
std::string trigger_word;
if (!analysis.tools.tool_section_start.empty() ||
analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
trigger_word = analysis.tools.tool_section_start;
} else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
trigger_word = analysis.tools.function_prefix;
} else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// For formats with per-call markers, use per_call_start as trigger
trigger_word = analysis.tools.per_call_start;
}
// Build grammar for tool calls
data.grammar_lazy = analysis.tools.supports_tools && has_tools;
// For FUNC_TAG_WITH_NAME with empty prefix (Functionary), disable lazy grammar
// since there's no clear trigger word - constrain from the start
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
analysis.tools.function_prefix.empty()) {
data.grammar_lazy = false;
}
if (data.grammar_lazy) {
if (!trigger_word.empty()) {
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_word });
}
}
// Build grammar
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
if (inputs.tools.is_array()) {
for (const auto & tool : inputs.tools) {
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
continue;
}
const auto & function = tool.at("function");
if (function.contains("parameters")) {
auto params = function.at("parameters");
builder.resolve_refs(params);
}
}
}
arena.build_grammar(builder, data.grammar_lazy);
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
auto schema = function.at("parameters");
builder.resolve_refs(schema);
});
parser.build_grammar(builder, data.grammar_lazy);
});
// Set preserved tokens from analysis
data.preserved_tokens = analysis.preserved_tokens;
LOG_DBG("=== UNIFIED PEG PARSER GENERATION COMPLETED ===\n");
} catch (const std::exception & e) {
LOG_DBG("Unified parser generation failed: %s\n", e.what());
throw;
// Set grammar triggers based on tool section markers (fall back to per-call markers)
std::string trigger_marker = !analysis.markers.tool_section_start.empty()
? analysis.markers.tool_section_start
: analysis.markers.per_call_start;
if (!trigger_marker.empty()) {
data.grammar_triggers = {
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
};
}
}
return data;
}
common_peg_arena universal_peg_generator::build_parser(const template_analysis_result & analysis,
const common_chat_template & tmpl,
const struct templates_params & inputs,
bool thinking_forced_open) {
GGML_UNUSED(tmpl);
common_peg_arena universal_peg_generator::build_parser(const diff_analysis_result & analysis,
const struct templates_params & inputs,
bool thinking_forced_open,
bool thinking_forced_closed) {
return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
p.set_allow_python_dict_format(true);
const auto & m = analysis.markers;
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
// Build reasoning block using ContentStructure
auto reasoning = p.build_reasoning_block(analysis.content, inputs.reasoning_format, thinking_forced_open);
common_peg_parser reasoning = p.eps();
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
bool enable_thinking = inputs.enable_thinking;
// Build content block using ContentStructure
// Note: we don't pass tool_section_start here because content-before-tools handling
// is done inline in each branch below with p.content(p.until(marker))
auto content = p.build_content_block(analysis.content, inputs.reasoning_format);
// Build tool section using ToolCallStructure (if applicable)
bool has_tools =
inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
if (has_tools && analysis.tools.supports_tools) {
bool force_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
auto tool_section =
p.build_tool_section(analysis.tools, inputs.tools, inputs.parallel_tool_calls, force_calls);
// Compose: reasoning -> content before tools -> tool_section -> trailing content
// When thinking is forced open, the reasoning block expects </think>.
// For tool-only messages (no thinking content), the model may output tools directly
// without the </think> tag, so we need to make reasoning optional in that case.
// But if reasoning_format is NONE, the reasoning block is already eps() - don't wrap it
// in optional() as that would generate invalid grammar.
auto reasoning_for_tools =
(thinking_forced_open && inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE) ?
p.optional(reasoning) :
reasoning;
if (!analysis.tools.tool_section_start.empty()) {
// With section markers: look for start marker to delimit content
auto content_before_tools = p.content(p.until(analysis.tools.tool_section_start));
return p.sequence({ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section,
p.space(), p.optional(p.content(p.rest())), p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
!analysis.tools.function_prefix.empty()) {
// Tag-with-name format (e.g., >>>func_name): content stops at function prefix
auto content_before_tools = p.content(p.until(analysis.tools.function_prefix));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
// Functionary-style format: tool call starts immediately (e.g., func_name\n{args})
// No content before tools in this format - the entire output is the tool call
return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// Bracket-tag (Mistral Small 3.2) or prefixed-indexed (Kimi-K2) format:
// Tool calls start with per_call_start marker (e.g., [TOOL_CALLS], <|tool_call_begin|>)
if (!analysis.tools.per_call_start.empty()) {
auto content_before_tools = p.content(p.until(analysis.tools.per_call_start));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
if (extract_reasoning && enable_thinking && analysis.reasoning != reasoning_mode::NONE) {
if (thinking_forced_open || thinking_forced_closed) {
// Thinking is forced open OR forced closed with enable_thinking=true
// In both cases, expect only the closing tag (opening was in template)
reasoning = p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end;
} else if (analysis.reasoning == reasoning_mode::TAG_BASED ||
analysis.reasoning == reasoning_mode::TOOLS_ONLY) {
// Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
// Both use the same tag-based pattern if markers are available
if (!m.reasoning_start.empty() && !m.reasoning_end.empty()) {
reasoning = p.optional(m.reasoning_start + p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end);
}
// Fallback: no content before tools
return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
} else if (analysis.reasoning == reasoning_mode::DELIMITER) {
reasoning = p.optional(p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end);
}
if (analysis.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK &&
!analysis.tools.code_block_marker.empty()) {
// Markdown code block format (Cohere Command-R Plus):
// Content stops at the code_block_marker (e.g., "Action:")
auto content_before_tools = p.content(p.until(analysis.tools.code_block_marker));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
// No section markers (raw JSON format): content must stop at JSON object start
// Tool calls start with "{", so use that as a delimiter
auto content_before_tools = p.content(p.until("{"));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
// No tools - just reasoning (if any) followed by content
return p.sequence({ reasoning, p.space(), content, p.end() });
});
bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
return parser;
if (has_response_format) {
return reasoning + p.space() + p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end();
}
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && analysis.supports_tools) {
return build_tool_parser(p, analysis, inputs, reasoning);
}
if (analysis.content == content_mode::ALWAYS_WRAPPED &&
!m.content_start.empty() && !m.content_end.empty()) {
bool extracting_reasoning = extract_reasoning && enable_thinking && analysis.reasoning != reasoning_mode::NONE;
if (extracting_reasoning) {
return reasoning + m.content_start + p.content(p.until(m.content_end)) + m.content_end + p.end();
}
return p.content(p.until(m.content_start)) + m.content_start +
p.content(p.until(m.content_end)) + m.content_end + p.end();
}
return reasoning + p.content(p.rest()) + p.end();
});
}
common_peg_parser universal_peg_generator::build_tool_parser(
common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning) {
const auto & m = analysis.markers;
// Build tool choice parser based on format
common_peg_parser tool_choice = p.choice();
if (analysis.tools == tool_format::JSON_NATIVE) {
// Pure JSON format: use standard_json_tools helper
// Build effective field names with dot notation if function_field is set
std::string name_field = analysis.name_field;
std::string args_field = analysis.args_field;
if (!analysis.function_field.empty() &&
analysis.function_field != "function" &&
name_field.find('.') == std::string::npos) {
name_field = analysis.function_field + "." + name_field;
args_field = analysis.function_field + "." + args_field;
}
auto tools_parser = p.standard_json_tools(
m.tool_section_start,
m.tool_section_end,
inputs.tools,
inputs.parallel_tool_calls,
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
name_field,
args_field,
analysis.tools_array_wrapped,
analysis.fun_name_is_key,
analysis.id_field,
analysis.gen_id_field,
analysis.parameter_order
);
// Handle content wrappers if present
if (analysis.content == content_mode::ALWAYS_WRAPPED &&
!m.content_start.empty() && !m.content_end.empty()) {
auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end);
return reasoning + wrapped_content + tools_parser + p.end();
}
auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start);
return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
}
if (analysis.tools == tool_format::TAG_WITH_JSON) {
// Tag-based with JSON args: <function=name>{args}</function>
// With optional call_id: <function=name>[CALL_ID]id[ARGS]{args}</function>
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & schema = function.at("parameters");
// Build call_id parser based on position (if supported)
common_peg_parser call_id_section = p.eps();
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
// Optional call_id followed by required call_id_suffix (which is also args_start)
// Format: optional([CALL_ID] + call_id_value) + [ARGS]
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
}
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
call_id_section +
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
if (!m.func_close.empty()) {
func_parser = func_parser + m.func_close;
}
tool_choice |= p.rule("tool-" + name, func_parser);
});
auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
common_peg_parser tool_calls = p.eps();
if (!m.per_call_start.empty()) {
// Per-call wrapping: each call individually wrapped
auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end;
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
tool_calls = p.trigger_rule("tool-call", wrapped_call);
}
if (!m.tool_section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
}
} else {
std::string separator = m.call_separator;
if (separator.empty()) {
separator = ", "; // Default
}
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end);
} else {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + tool_choice + m.tool_section_end);
}
}
if (!require_calls) {
tool_calls = p.optional(tool_calls);
}
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
if (analysis.tools == tool_format::TAG_WITH_TAGGED) {
// Tag-based with tagged args: <function=name><param=key>value</param></function>
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & params = function.at("parameters");
if (!params.contains("properties") || !params.at("properties").is_object()) {
return;
}
const auto & properties = params.at("properties");
std::set<std::string> required;
if (params.contains("required") && params.at("required").is_array()) {
params.at("required").get_to(required);
}
// Build parser for each argument
std::vector<common_peg_parser> arg_parsers;
for (const auto & [param_name, param_schema] : properties.items()) {
bool is_required = required.find(param_name) != required.end();
auto type = param_schema.value("type", "object");
auto arg = p.tool_arg(
p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix +
(type == "string" ?
p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix),
"tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
p.tool_arg_json_value(p.schema(p.json(),
"tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
p.tool_arg_close(p.literal(m.arg_value_suffix))
);
if (is_required) {
arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
} else {
arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
}
}
// Build arg sequence with space() between consecutive args
common_peg_parser args_seq = p.eps();
for (size_t i = 0; i < arg_parsers.size(); i++) {
if (i > 0) {
args_seq = args_seq + p.space();
}
args_seq = args_seq + arg_parsers[i];
}
// Build call_id parser based on position (if supported)
common_peg_parser call_id_section = p.eps();
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
// Optional call_id followed by required call_id_suffix
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
}
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
call_id_section +
p.space() + args_seq;
if (!m.func_close.empty()) {
func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close));
} else {
func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
}
tool_choice |= p.rule("tool-" + name, func_parser);
});
auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
common_peg_parser tool_calls = p.eps();
if (!m.per_call_start.empty()) {
// Per-call wrapping: each call individually wrapped (e.g., <tool_call>...</tool_call>)
auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end;
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
tool_calls = p.trigger_rule("tool-call", wrapped_call);
}
if (!m.tool_section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
}
} else {
std::string separator = m.call_separator;
if (separator.empty()) {
separator = ", "; // Default
}
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end);
} else {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end);
}
}
if (!require_tools) {
tool_calls = p.optional(tool_calls);
}
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
GGML_ABORT("Unable to create tool parser");
}

File diff suppressed because it is too large Load Diff

View File

@ -1,133 +1,22 @@
#pragma once
#include <optional>
#include "chat-diff-analyzer.h"
#include <string>
#include <vector>
#include "chat.h"
#include "nlohmann/json.hpp"
std::string trim_whitespace(const std::string & str);
std::string trim_leading_whitespace(const std::string & str);
std::string trim_trailing_whitespace(const std::string & str);
std::string trim_trailing_newlines(const std::string & str);
using json = nlohmann::ordered_json;
// calculate a diff split (longest common prefix, longest common suffix excluding prefix,
// mismatched part on the left, mismatched part on the right) between two strings
diff_split calculate_diff_split(const std::string & left, const std::string & right);
namespace minja {
class chat_template;
}
// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right);
void trim_whitespace(std::string & str);
void trim_trailing_newlines(std::string & str);
size_t count_non_whitespace(const std::string & str);
size_t find_last_of_any(const std::string & str, const std::string & chars, size_t start_pos);
// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right);
std::string extract_tag_name(const std::string & tag);
std::string create_closing_tag(const std::string & opening_tag);
std::string find_common_prefix(const std::vector<std::string> & strings);
std::string find_common_suffix_generic(const std::vector<std::string> & strings);
std::string find_common_substring_limited(const std::vector<std::string> & strings,
size_t max_length,
const std::string & delimiters);
bool string_ends_with(const std::string & str, const std::string & suffix);
std::string apply_template(common_chat_template & tmpl,
const struct templates_params & inputs,
const std::optional<json> & messages_override = std::nullopt,
const std::optional<json> & tools_override = std::nullopt,
const std::optional<json> & additional_context = std::nullopt);
// Adjust a marker string to ensure it ends at a complete <|...|> token boundary
// This prevents truncation mid-token
std::string adjust_to_token_boundary(const std::string & str);
// Find the position of a token opener (<| or <) in a string
// Returns std::string::npos if not found
size_t find_token_opener(const std::string & str, size_t start_pos = 0);
// Find the position of a token closer (|> or >) in a string
// Returns std::string::npos if not found
size_t find_token_closer(const std::string & str, size_t start_pos = 0);
// Get the length of the token opener at the given position (2 for <| or 4 for <)
// Returns 0 if no valid opener at position
size_t get_token_opener_length(const std::string & str, size_t pos);
// Get the length of the token closer at the given position (2 for |> or 4 for >)
// Returns 0 if no valid closer at position
size_t get_token_closer_length(const std::string & str, size_t pos);
// Strip EOS/end-of-sentence tokens from the end of a string
// Handles both standard (<|eos|>, <|eot_id|>) and fullwidth (<end▁of▁sentence>) formats
std::string strip_eos_token(const std::string & str);
// Internal structure for differential analysis (used during pattern extraction)
struct internal_discovered_pattern {
std::string tool_call_opener;
std::string tool_call_closer;
std::string function_opener;
std::string function_closer;
std::string function_name_suffix;
std::string parameter_opener;
std::string parameter_closer;
std::string argument_separator;
std::string parameter_key_prefix;
std::string parameter_key_suffix;
std::string tool_call_start_marker;
std::string tool_call_end_marker;
std::string reasoning_start_marker;
std::string reasoning_end_marker;
std::string content_start_marker;
std::string content_end_marker;
std::string tool_name_field = "name";
std::string tool_args_field = "arguments";
std::string tool_id_field;
// For markdown code block format (Cohere Command-R Plus)
std::string code_block_marker; // e.g., "Action:"
std::string code_block_language; // e.g., "json"
// Flag: template renders null content as "None" string, requires empty string instead
bool requires_nonnull_content = false;
};
// Internal enum for format classification
enum internal_tool_format {
FORMAT_JSON_NATIVE,
FORMAT_XML_CONSTRUCTED,
FORMAT_BRACKET_TAG, // [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} (Mistral Small 3.2)
FORMAT_RECIPIENT_BASED, // >>>recipient\n{content} (Functionary v3.2)
FORMAT_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[...]\n``` (Cohere Command-R Plus)
FORMAT_CONTENT_ONLY,
FORMAT_UNKNOWN
};
// Find the suffix that differentiates an extended string from a base string
std::string find_string_difference(const std::string & base, const std::string & extended);
// Extract JSON field name from an opener string
std::string extract_json_field_name(const std::string & opener,
const std::string & default_name,
const std::vector<std::string> & candidates);
// Find a closing pattern in a string starting from a given position
std::string find_closing_pattern(const std::string & diff, size_t func_pos);
// Find the tool call start marker in a difference string
std::string find_tool_call_start(const std::string & diff);
// Find the tool call end marker in a difference string
std::string find_tool_call_end(const std::string & diff, size_t func_pos);
// Infer the tool call opener from multiple difference strings
std::string infer_tool_call_opener(const std::string & diff1, const std::string & diff2, const std::string & diff3);
// Infer the tool call closer from multiple difference strings
std::string infer_tool_call_closer(const std::string & diff1, const std::string & diff2, const std::string & diff3);
// Extract patterns from differences between tool calls
internal_discovered_pattern extract_patterns_from_differences(const std::string & tool1_diff,
const std::string & tool2_diff,
const std::string & tool3_diff,
const std::string & tool1_full = "");
// Determine the format classification from discovered patterns
internal_tool_format determine_format_from_patterns(const internal_discovered_pattern & patterns);
// Analyze template using differential analysis (internal use)
internal_discovered_pattern analyze_by_differential(const common_chat_template & tmpl);
// Segmentize text into markers and non-marker fragments
std::vector<segment> segmentize_markers(const std::string & text);

View File

@ -1,183 +1,54 @@
#pragma once
#include "chat-diff-analyzer.h"
#include "chat.h"
#include "chat-peg-parser.h"
#include "common.h"
#include "jinja/runtime.h"
#include <chrono>
#include <string>
#include <vector>
using json = nlohmann::ordered_json;
// Phase 1 result: Content and reasoning structure (analyzed without tools)
struct content_structure {
// Reasoning handling mode
enum reasoning_mode_type {
REASONING_NONE, // No reasoning markers detected
REASONING_OPTIONAL, // <think>...</think> may appear before content
REASONING_FORCED_OPEN, // Template ends with open reasoning tag (thinking_forced_open)
};
reasoning_mode_type reasoning_mode = REASONING_NONE;
std::string reasoning_start; // e.g., "<think>", "<|START_THINKING|>"
std::string reasoning_end; // e.g., "</think>", "<|END_THINKING|>"
// Content wrapping mode
enum content_mode_type {
CONTENT_PLAIN, // No content markers
CONTENT_ALWAYS_WRAPPED, // <response>...</response> always present
CONTENT_WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present
};
content_mode_type content_mode = CONTENT_PLAIN;
std::string content_start; // e.g., "<response>", "<|START_RESPONSE|>"
std::string content_end; // e.g., "</response>", "<|END_RESPONSE|>"
};
// Phase 2 result: Tool call structure (layered on Phase 1)
struct tool_call_structure {
bool supports_tools = false;
// Container markers (what wraps all tool calls)
std::string tool_section_start; // e.g., "<tool_call>", "[TOOL_CALLS]", "<TOOLCALL>", ""
std::string tool_section_end; // e.g., "</tool_call>", "]", "</TOOLCALL>", ""
// Function format (how individual functions are structured)
enum function_format {
FUNC_JSON_OBJECT, // {"name": "X", "arguments": {...}}
FUNC_TAG_WITH_NAME, // <function=X>{...}</function>
FUNC_TAG_NAME_ONLY, // <X>...</X> where X is function name (rare)
FUNC_PREFIXED_INDEXED, // <|tool_call_begin|>functions.X:0<|tool_call_argument_begin|>{...}<|tool_call_end|>
FUNC_NAME_AS_KEY, // [{"function_name": {...arguments...}}] (Apertus-style)
FUNC_BRACKET_TAG, // [TOOL_CALLS]X[CALL_ID]id[ARGS]{...} (Mistral Small 3.2 style)
FUNC_RECIPIENT_BASED, // >>>recipient\n{content} where recipient is "all" (content) or function name (tools)
FUNC_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[...]\n``` (Cohere Command-R Plus style)
};
function_format function_format = FUNC_JSON_OBJECT;
// For FUNC_JSON_OBJECT format - field names (may vary between templates)
std::string name_field = "name"; // Could be "tool_name", "function"
std::string args_field = "arguments"; // Could be "parameters", "params", "input"
std::string id_field; // Optional: "id", "tool_call_id", ""
// For FUNC_TAG_WITH_NAME format
std::string function_prefix; // e.g., "<function="
std::string function_suffix; // e.g., ">"
std::string function_close; // e.g., "</function>"
// For FUNC_PREFIXED_INDEXED format (e.g., Kimi-K2)
std::string per_call_start; // e.g., "<|tool_call_begin|>"
std::string function_namespace; // e.g., "functions." (prefix before function name)
std::string args_marker; // e.g., "<|tool_call_argument_begin|>"
std::string per_call_end; // e.g., "<|tool_call_end|>"
// For FUNC_BRACKET_TAG format (e.g., Mistral Small 3.2)
std::string id_marker; // e.g., "[CALL_ID]" - marker before tool call ID
// For FUNC_MARKDOWN_CODE_BLOCK format (e.g., Cohere Command-R Plus)
std::string code_block_marker; // e.g., "Action:" - text marker before code block
std::string code_block_language; // e.g., "json" - language identifier in code fence
// Argument format (how arguments are structured within a function)
enum argument_format {
ARGS_JSON, // Standard JSON object: {"key": "value", ...}
ARGS_TAGGED, // XML-style: <param=key>value</param>
ARGS_KEY_VALUE_TAGS, // <arg_key>key</arg_key><arg_value>value</arg_value> (GLM-4.6)
};
argument_format argument_format = ARGS_JSON;
// For ARGS_TAGGED format
std::string arg_prefix; // e.g., "<param=", "<parameter="
std::string arg_suffix; // e.g., ">"
std::string arg_close; // e.g., "</param>", "</parameter>"
std::string arg_separator; // e.g., "", "\n"
// Flag: template renders null content as "None" string, requires empty string instead
bool requires_nonnull_content = false;
};
// Combined result of unified template analysis
struct template_analysis_result {
content_structure content;
tool_call_structure tools;
// Preserved tokens for tokenizer (union of all markers)
std::vector<std::string> preserved_tokens;
};
// Template analyzer that uses two-phase differential analysis
class template_analyzer {
public:
// Main entry point: Unified two-phase analysis
static template_analysis_result analyze_template(const common_chat_template & tmpl);
// Phase 1 - Analyze content and reasoning structure (no tools)
static content_structure analyze_content_structure(const common_chat_template & tmpl);
// Phase 2 - Analyze tool call structure (layered on Phase 1)
static tool_call_structure analyze_tool_structure(const common_chat_template & tmpl,
const content_structure & content);
private:
// Phase 1 detection helpers
static void detect_reasoning_markers(const common_chat_template & tmpl, content_structure & cs);
static void detect_content_markers(const common_chat_template & tmpl, content_structure & cs);
static content_structure::reasoning_mode_type detect_reasoning_mode(const content_structure & cs,
const std::string & prompt);
// Phase 2 detection helpers
static void detect_tool_markers(const common_chat_template & tmpl, tool_call_structure & ts);
static void detect_function_format(const common_chat_template & tmpl, tool_call_structure & ts);
static void detect_argument_format(const common_chat_template & tmpl, tool_call_structure & ts);
// Phase 2 helper methods
static void analyze_json_format(tool_call_structure & ts, const struct internal_discovered_pattern & discovered);
static void analyze_xml_format(tool_call_structure & ts, const struct internal_discovered_pattern & discovered);
static void analyze_bracket_tag_format(tool_call_structure & ts,
const struct internal_discovered_pattern & discovered);
static void analyze_recipient_based_format(tool_call_structure & ts,
const struct internal_discovered_pattern & discovered);
static void analyze_markdown_code_block_format(tool_call_structure & ts,
const struct internal_discovered_pattern & discovered);
// Helper to collect preserved tokens from analysis result
static void collect_preserved_tokens(template_analysis_result & result);
};
struct templates_params {
json messages;
json tools;
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
json json_schema;
bool parallel_tool_calls = true;
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO;
bool stream = true;
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO;
bool stream = true;
std::string grammar;
bool add_generation_prompt = false;
bool enable_thinking = true;
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
bool enable_thinking = true;
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
json extra_context;
bool add_bos = false;
bool add_eos = false;
bool is_inference = true;
bool add_bos = false;
bool add_eos = false;
bool is_inference = true;
bool add_inference = false;
bool mark_input = true; // whether to mark input strings in the jinja context
bool mark_input = true; // whether to mark input strings in the jinja context
};
class universal_peg_generator {
public:
// Generate parser from analysis result
static common_chat_params generate_parser(const template_analysis_result & analysis,
const common_chat_template & tmpl,
const struct templates_params & inputs);
static common_chat_params generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs);
static common_chat_params generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs,
const diff_analysis_result & analysis);
private:
// Build unified parser (single code path for all formats)
static common_peg_arena build_parser(const template_analysis_result & analysis,
const common_chat_template & tmpl,
const struct templates_params & inputs,
bool thinking_forced_open);
static common_peg_arena build_parser(const diff_analysis_result & analysis,
const struct templates_params & inputs,
bool thinking_forced_open,
bool thinking_forced_closed = false);
// Build tool calling parser based on detected format
static common_peg_parser build_tool_parser(common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
};

File diff suppressed because it is too large Load Diff

347
common/chat-diff-analyzer.h Normal file
View File

@ -0,0 +1,347 @@
#pragma once
#include "chat.h"
#include "nlohmann/json.hpp"
#include <functional>
#include <optional>
#include <string>
#include <utility>
#include <vector>
using json = nlohmann::ordered_json;
// ============================================================================
// Parameters for template application
// ============================================================================
struct template_params {
json messages;
json tools;
bool add_generation_prompt = false;
bool enable_thinking = true;
std::optional<json> extra_context = std::nullopt;
};
struct diff_split {
std::string prefix;
std::string suffix;
std::string left;
std::string right;
bool operator==(struct diff_split & other) const {
return prefix == other.prefix && suffix == other.suffix && left == other.left && right == other.right;
}
};
// Result of compare_variants containing diff and original outputs
struct compare_variants_result {
diff_split diff;
std::string output_A;
std::string output_B;
};
// ============================================================================
// Marker Registry: All markers extracted via differential analysis
// ============================================================================
// Markers extracted from differential analysis of template outputs
// Each marker is derived from a specific comparison in the analysis matrix
struct marker_registry {
// === Reasoning markers (from Phase 1: R1-R3) ===
std::string reasoning_start; // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
std::string reasoning_end; // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
// === Content markers (from Phase 2: C1-C2) ===
std::string content_start; // e.g., "<response>", ">>>all\n", ""
std::string content_end; // e.g., "</response>", ""
// === Tool section markers (from Phase 3: T1-T2) ===
std::string tool_section_start; // e.g., "<tool_call>", "[TOOL_CALLS]", ""
std::string tool_section_end; // e.g., "</tool_call>", ""
std::string per_call_start; // e.g., "<|tool_call_begin|>", "" (for multi-call templates)
std::string per_call_end; // e.g., "<|tool_call_end|>", ""
std::string call_separator; // e.g., ",", "\n", "" (between multiple calls)
// === Function markers (from Phase 3: T3-T5) ===
std::string func_name_prefix; // e.g., "<function=", "\"name\": \"", "functions."
std::string func_name_suffix; // e.g., ">", "\"", ":0"
std::string func_close; // e.g., "</function>", "" (for tag-based)
std::string args_start; // e.g., "{", "<|tool_call_argument_begin|>"
std::string args_end; // e.g., "}", ""
// === Argument markers (from Phase 4: A1-A3, for tagged args format) ===
std::string arg_name_prefix; // e.g., "<param=", "<arg_key>", "\""
std::string arg_name_suffix; // e.g., ">", "</arg_key>", "\":"
std::string arg_value_prefix; // e.g., "", "<arg_value>", ""
std::string arg_value_suffix; // e.g., "</param>", "</arg_value>", ""
std::string arg_separator; // e.g., "", "\n", ","
// === Call ID markers (for non-JSON formats with tool call IDs) ===
std::string call_id_prefix; // e.g., "[CALL_ID]" (marker before call ID value)
std::string call_id_suffix; // e.g., "" (marker after call ID value, before next section)
// === Special markers ===
std::string code_block_marker; // e.g., "Action:" (for markdown code block format)
std::string code_block_language; // e.g., "json"
std::string function_namespace; // e.g., "functions." (for prefixed-indexed format)
};
// ============================================================================
// Analysis Result Enums
// ============================================================================
// Reasoning handling mode (derived from R1-R3 comparisons)
enum class reasoning_mode {
NONE, // No reasoning markers detected
TAG_BASED, // Standard tag-based: <think>...</think>
DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter)
FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end)
FORCED_CLOSED,// Template ends with open reasoning tag on enabled thinking but
// with both opened and closed tag for disabled thinking
TOOLS_ONLY // Only reason on tool calls, not on normal content
};
inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) {
switch (mode) {
case reasoning_mode::NONE:
return os << "NONE";
case reasoning_mode::TAG_BASED:
return os << "TAG_BASED";
case reasoning_mode::DELIMITER:
return os << "DELIMITER";
case reasoning_mode::FORCED_OPEN:
return os << "FORCED_OPEN";
case reasoning_mode::FORCED_CLOSED:
return os << "FORCED_CLOSED";
case reasoning_mode::TOOLS_ONLY:
return os << "TOOLS_ONLY";
default:
return os << "UNKNOWN";
}
}
// Content wrapping mode (derived from C1 comparison)
enum class content_mode {
PLAIN, // No content markers
ALWAYS_WRAPPED, // Content always wrapped with markers
WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present
};
inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
switch (mode) {
case content_mode::PLAIN:
return os << "PLAIN";
case content_mode::ALWAYS_WRAPPED:
return os << "ALWAYS_WRAPPED";
case content_mode::WRAPPED_WITH_REASONING:
return os << "WRAPPED_WITH_REASONING";
default:
return os << "UNKNOWN";
}
}
// Call ID position in tool calls (for non-JSON formats)
enum class call_id_position {
NONE, // No call ID support detected
PRE_FUNC_NAME, // Call ID before function name: [CALL_ID]id[FUNC]name{args}
BETWEEN_FUNC_AND_ARGS, // Call ID between function and args: [FUNC]name[CALL_ID]id{args}
POST_ARGS, // Call ID after arguments: [FUNC]name{args}[CALL_ID]id
};
inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) {
switch (pos) {
case call_id_position::NONE:
return os << "NONE";
case call_id_position::PRE_FUNC_NAME:
return os << "PRE_FUNC_NAME";
case call_id_position::BETWEEN_FUNC_AND_ARGS:
return os << "BETWEEN_FUNC_AND_ARGS";
case call_id_position::POST_ARGS:
return os << "POST_ARGS";
default:
return os << "UNKNOWN";
}
}
// Tool call format classification (derived from T1-T5, A1-A3 comparisons)
enum class tool_format {
NONE, // No tool support detected
JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}}
TAG_WITH_JSON, // Tag-based with JSON args: <function=X>{...}</function>
BRACKET_TAG, // Bracket-tag: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...}
PREFIXED_INDEXED, // Prefixed-indexed: functions.X:0{...}
RECIPIENT_BASED, // Recipient routing: >>>func_name\n{...}
TAG_WITH_TAGGED, // Tag-based with tagged args: <param=key>value</param>
MARKDOWN_BLOCK, // Markdown code block: Action:\n```json\n[...]\n```
};
inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
switch (format) {
case tool_format::NONE:
return os << "NONE";
case tool_format::JSON_NATIVE:
return os << "JSON_NATIVE";
case tool_format::TAG_WITH_JSON:
return os << "TAG_WITH_JSON";
case tool_format::BRACKET_TAG:
return os << "BRACKET_TAG";
case tool_format::PREFIXED_INDEXED:
return os << "PREFIXED_INDEXED";
case tool_format::RECIPIENT_BASED:
return os << "RECIPIENT_BASED";
case tool_format::TAG_WITH_TAGGED:
return os << "TAG_WITH_TAGGED";
case tool_format::MARKDOWN_BLOCK:
return os << "MARKDOWN_BLOCK";
default:
return os << "UNKNOWN";
}
}
// Complete result of differential analysis
struct diff_analysis_result {
// Classification results
reasoning_mode reasoning = reasoning_mode::NONE;
content_mode content = content_mode::PLAIN;
tool_format tools = tool_format::NONE;
// All extracted markers
marker_registry markers;
// JSON field names (for JSON-based formats)
bool fun_name_is_key = false;
std::string function_field = "function";
std::string name_field = "name";
std::string args_field = "arguments";
std::string id_field;
std::string gen_id_field;
std::vector<std::string> parameter_order;
// Call ID position (for non-JSON formats)
call_id_position call_id_pos = call_id_position::NONE;
// Flags
bool supports_tools = false;
bool supports_parallel_calls = false;
bool requires_nonnull_content = false;
bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...]
// Preserved tokens for tokenizer (union of all non-empty markers)
std::vector<std::string> preserved_tokens;
};
// Performs systematic differential analysis on chat templates
// Uses comparison matrix to extract markers without heuristics
class differential_analyzer {
public:
// Main entry point: Run full differential analysis on a template
static diff_analysis_result analyze(const common_chat_template & tmpl);
// Phase-specific analysis (can be called individually for testing)
static void analyze_reasoning(const common_chat_template & tmpl, diff_analysis_result & result);
static void analyze_content(const common_chat_template & tmpl, diff_analysis_result & result);
static void analyze_tools(const common_chat_template & tmpl, diff_analysis_result & result);
static void analyze_arguments(const common_chat_template & tmpl, diff_analysis_result & result);
// Factorized differential comparison function (public for testing)
// Takes base params and a single modifier lambda to create variant B
// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure
static std::optional<compare_variants_result> compare_variants(
const common_chat_template & tmpl,
const template_params & params_A,
const std::function<void(template_params &)> & params_modifier);
private:
// Comparison helpers (implement the comparison matrix from the plan)
// R1: Extract reasoning markers by comparing with/without reasoning_content
static void compare_reasoning_presence(const common_chat_template & tmpl, diff_analysis_result & result);
// R2: Detect forced-open reasoning by comparing enable_thinking=false vs true
static void compare_thinking_enabled(const common_chat_template & tmpl, diff_analysis_result & result);
// R3: Detect reasoning scope (content-only vs with tools)
static void compare_reasoning_scope(const common_chat_template & tmpl, diff_analysis_result & result);
// C1: Extract content markers by comparing different content values
static void compare_content_values(const common_chat_template & tmpl, diff_analysis_result & result);
// T1: Analyze the tool calls
static void analyze_tool_calls(const common_chat_template & tmpl, diff_analysis_result & result);
// Analyzes a tool call section to determine the format used (pure JSON, function name markers, or full markers)
static void analyze_tool_call_format(const std::string & haystack,
const std::string & fun_name_needle,
const std::string & arg_name_needle,
diff_analysis_result & result);
// Helper functions to handle the two branches of analyze_tool_call_format
static void analyze_tool_call_format_json_native(const std::string & clean_haystack,
const std::string & fun_name_needle,
const std::string & arg_name_needle,
diff_analysis_result & result);
static void analyze_tool_call_format_non_json(const std::string & clean_haystack,
const std::string & fun_name_needle,
diff_analysis_result & result);
// T2: Check if markers are per call or per section
static void check_per_call_markers(const common_chat_template & tmpl, diff_analysis_result & result);
// T3: Extract call separator; also outputs second_call_content for per-call detection
static void extract_call_separator(const common_chat_template & tmpl, diff_analysis_result & result,
std::string & second_call_content);
// T4: Analyze function name format and extract markers
static void extract_function_markers(const common_chat_template & tmpl,
diff_analysis_result & result);
// T5: Extract argument separator
static void extract_argument_separator(const common_chat_template & tmpl, diff_analysis_result & result);
// T6: Extract args container markers
static void extract_args_markers(const common_chat_template & tmpl, diff_analysis_result & result);
// A1: Extract argument name markers
static void extract_argument_name_markers(const common_chat_template & tmpl, diff_analysis_result & result);
// A2: Extract argument value markers
static void extract_argument_value_markers(const common_chat_template & tmpl, diff_analysis_result & result);
// T7: Extract call ID markers (for non-JSON formats)
static void extract_call_id_markers(const common_chat_template & tmpl, diff_analysis_result & result);
// Classify tool format based on extracted markers
static void classify_tool_format(diff_analysis_result & result);
// Classification helpers
static void collect_preserved_tokens(diff_analysis_result & result);
// Utility: Apply template with given parameters
static std::string apply_template(const common_chat_template & tmpl,
const template_params & params);
};
enum segment_type {
TEXT,
MARKER
};
inline std::ostream & operator<<(std::ostream & os, const segment_type & type) {
switch (type) {
case segment_type::TEXT:
return os << "TEXT";
case segment_type::MARKER:
return os << "MARKER";
default:
return os << "UNKNOWN";
}
}
struct segment {
segment_type type;
std::string value;
segment(segment_type type, std::string value) : type(type), value(std::move(value)) {}
};

View File

@ -148,585 +148,6 @@ common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::stri
return zero_or_more(choice({ p, content_chunk }));
}
common_peg_parser common_chat_peg_unified_builder::build_reasoning_block(const content_structure & cs,
common_reasoning_format reasoning_format,
bool thinking_forced_open) {
// If reasoning is explicitly disabled, return empty
if (reasoning_format == COMMON_REASONING_FORMAT_NONE) {
return eps();
}
// Get reasoning markers - use from content_structure or fallback for DEEPSEEK format
std::string reason_start = cs.reasoning_start;
std::string reason_end = cs.reasoning_end;
// If DEEPSEEK format is specified but markers weren't detected, use fallback markers
if ((reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ||
reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY) &&
(reason_start.empty() || reason_end.empty())) {
// Try standard DeepSeek markers
if (reason_start.empty()) {
reason_start = "<think>";
}
if (reason_end.empty()) {
reason_end = "</think>";
}
}
// If still no markers, return empty
// But allow empty start marker if thinking is forced open (implicit start)
if ((reason_start.empty() && !thinking_forced_open) || reason_end.empty()) {
return eps();
}
if (thinking_forced_open) {
// Mandatory reasoning: parse from current position to end marker
auto parser = reasoning(until(reason_end)) + literal(reason_end);
return rule("reasoning", reasoning_block(parser));
}
// Optional reasoning: may or may not appear
// Also try <|START_THINKING|> style markers if standard markers don't match
auto standard_reasoning =
reasoning_block(literal(reason_start) + reasoning(until(reason_end)) + literal(reason_end));
// For templates that use <|START_THINKING|> style markers
if (reason_start == "<think>" && reason_end == "</think>") {
auto alt_reasoning = reasoning_block(literal("<|START_THINKING|>") + reasoning(until("<|END_THINKING|>")) +
literal("<|END_THINKING|>"));
return optional(rule("reasoning", choice({ standard_reasoning, alt_reasoning })));
}
return optional(rule("reasoning", standard_reasoning));
}
common_peg_parser common_chat_peg_unified_builder::build_content_block(const content_structure & cs,
common_reasoning_format reasoning_format,
const std::string & tool_section_start) {
GGML_UNUSED(tool_section_start); // leaving for now just in case
std::string content_start = cs.content_start;
std::string content_end = cs.content_end;
// Add fallback content markers for DEEPSEEK format if not detected
// Some templates use <response> tags for content when reasoning is enabled
if ((reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ||
reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY) &&
(content_start.empty() || content_end.empty())) {
content_start = "<response>";
content_end = "</response>";
}
// Handle content markers with both start and end
if (cs.content_mode != content_structure::CONTENT_PLAIN && !cs.content_start.empty() && !cs.content_end.empty()) {
// Content is wrapped in markers
if (reasoning_format == COMMON_REASONING_FORMAT_NONE) {
// When reasoning_format=NONE, preserve any content before the content start marker
// (this may include reasoning/thinking markers that the model generates).
// This applies even if reasoning markers weren't detected by the analyzer.
auto with_markers = content(until(cs.content_start)) + literal(cs.content_start) +
content(until(cs.content_end)) + literal(cs.content_end);
// Fallback: content wrapped in end marker only (start marker might be in prompt)
auto implicit_markers = content(until(cs.content_end)) + literal(cs.content_end);
auto without_markers = content(rest());
return choice({ with_markers, implicit_markers, without_markers });
} // When reasoning is parsed separately, content starts directly after reasoning block
auto with_markers = literal(cs.content_start) + content(until(cs.content_end)) + literal(cs.content_end);
auto implicit_markers = content(until(cs.content_end)) + literal(cs.content_end);
auto without_markers = content(rest());
return choice({ with_markers, implicit_markers, without_markers });
}
// Handle content with only start marker (no end marker)
// This is for formats like recipient-based (Functionary v3.2) where content is prefixed with
// a marker but has no explicit closing marker - content ends at end of message or before tool calls
if (cs.content_mode != content_structure::CONTENT_PLAIN && !cs.content_start.empty() && cs.content_end.empty()) {
if (reasoning_format == COMMON_REASONING_FORMAT_NONE) {
// Preserve any content before the start marker, then consume the marker and capture rest
auto with_start_marker = content(until(cs.content_start)) + literal(cs.content_start) + content(rest());
auto without_markers = content(rest());
return choice({ with_start_marker, without_markers });
} // Content starts directly after reasoning block
auto with_start_marker = literal(cs.content_start) + content(rest());
auto without_markers = content(rest());
return choice({ with_start_marker, without_markers });
}
// For DEEPSEEK format, try fallback content markers even if not detected
if (!content_start.empty() && !content_end.empty()) {
auto with_markers = literal(content_start) + content(until(content_end)) + literal(content_end);
auto without_markers = content(rest());
return choice({ with_markers, without_markers });
}
// Plain content - capture rest
return content(rest());
}
common_peg_parser common_chat_peg_unified_builder::build_tool_section(const tool_call_structure & ts,
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls) {
if (!ts.supports_tools || !tools.is_array() || tools.empty()) {
return eps();
}
// Build tool choices based on function format
auto tool_choices = choice();
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
tool_choices |= rule("tool-" + name, build_function(ts, name, params));
}
// Build the section with or without markers
auto build_section = [&]() -> common_peg_parser {
// Markdown code block format (Cohere Command-R Plus):
// Action:\n```json\n[{...}]\n```
if (ts.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK) {
// Build the opening: "Action:\n```json"
std::string code_fence_open = "```";
if (!ts.code_block_language.empty()) {
code_fence_open += ts.code_block_language;
}
auto opening = literal(ts.code_block_marker) + literal("\n") + literal(code_fence_open) + literal("\n");
auto closing = literal("\n") + literal(ts.tool_section_end); // "\n```"
// Build the JSON array of tool calls
// Don't use trigger_rule here since we're nested inside a sequence
auto tools_array = literal("[") + space();
if (parallel_tool_calls) {
tools_array = tools_array + tool_choices;
tools_array = tools_array + zero_or_more(space() + literal(",") + space() + tool_choices);
} else {
tools_array = tools_array + optional(tool_choices);
}
tools_array = tools_array + space() + literal("]");
// Full section: Action:\n```json\n[{...}]\n```
return trigger_rule("tool-call", opening + tools_array + closing);
}
// Recipient-based format (Functionary v3.2): >>>function_name\n{arguments}
// Uses tool_section_start as delimiter, but no array wrapper or section markers
if (ts.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
auto tool_call = trigger_rule("tool-call", tool_choices);
if (parallel_tool_calls) {
// Multiple tool calls: each starts with >>>
return one_or_more(tool_call + space());
}
return tool_call;
}
if (!ts.tool_section_start.empty() && !ts.tool_section_end.empty()) {
// Check if this format has SEPARATE section markers and per-call markers.
// This happens when:
// - Section markers wrap the ENTIRE section (e.g., <tool_calls_begin>...<tool_calls_end>)
// - Function prefix contains its own per-call marker (e.g., <tool_call_begin>...)
// Example: DeepSeek R1 with section and call markers, Kimi-K2 with prefixed-indexed format
// We detect this by checking if function_prefix contains a per-call START marker
// (indicated by words like "call_begin", "call_start", or similar patterns)
bool has_separate_section_and_call_markers = false;
// FUNC_PREFIXED_INDEXED and FUNC_BRACKET_TAG always have separate section and per-call markers
if (ts.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED ||
ts.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
has_separate_section_and_call_markers = true;
} else if (ts.function_format == tool_call_structure::FUNC_NAME_AS_KEY) {
// FUNC_NAME_AS_KEY uses comma-separated JSON objects in an array
// Format: [{"func1": args}, {"func2": args}]
// The brackets are included in section markers
auto tool_call = trigger_rule("tool-call", tool_choices);
auto tool_calls = tool_call;
if (parallel_tool_calls) {
tool_calls = tool_call + zero_or_more(space() + literal(",") + space() + tool_call);
}
return literal(ts.tool_section_start) + space() + tool_calls + space() + literal(ts.tool_section_end);
} else if (ts.function_format == tool_call_structure::FUNC_TAG_WITH_NAME && !ts.function_prefix.empty()) {
// Check if function_prefix contains a per-call marker like "<tool_call_begin>"
// This differentiates DeepSeek R1 (where function_prefix has its own call marker)
// from Nemotron (where function_prefix is just "<function=")
// DeepSeek pattern: function_prefix = "<tool▁call▁begin>function<tool▁sep>"
// Nemotron pattern: function_prefix = "<function="
bool prefix_has_call_marker = ts.function_prefix.find("call") != std::string::npos &&
(ts.function_prefix.find("begin") != std::string::npos ||
ts.function_prefix.find("start") != std::string::npos);
if (prefix_has_call_marker) {
has_separate_section_and_call_markers = true;
}
}
if (has_separate_section_and_call_markers) {
// Section markers wrap all calls, per-call markers are in function_prefix/close
// Format: <section_start> <call1> <call2> ... <section_end>
auto tool_call = trigger_rule("tool-call", tool_choices);
auto tool_calls = parallel_tool_calls ? one_or_more(tool_call + space()) : tool_call;
return literal(ts.tool_section_start) + space() + tool_calls + space() + literal(ts.tool_section_end);
} // Each tool call has its own wrapper: <tool_call>tool</tool_call>
auto single_tool_section =
trigger_rule("tool-call", literal(ts.tool_section_start) + space() + tool_choices + space() +
literal(ts.tool_section_end));
if (parallel_tool_calls) {
// Multiple wrapped tool calls
return one_or_more(single_tool_section + space());
}
return single_tool_section;
}
if (!ts.tool_section_start.empty()) {
// Start marker only (no end marker) - e.g., <|tool_call|>[...]
// Wrap all tool calls in an array after the start marker
auto tools_array = literal("[") + space();
if (parallel_tool_calls) {
tools_array = tools_array + tool_choices;
tools_array = tools_array + zero_or_more(space() + literal(",") + space() + tool_choices);
} else {
tools_array = tools_array + optional(tool_choices);
}
tools_array = tools_array + space() + literal("]");
return trigger_rule("tool-call", literal(ts.tool_section_start) + tools_array);
} // No section markers (raw JSON format, e.g., Llama 3.1)
// Use trigger rule since tool calls are identified by regex trigger on the grammar
if (parallel_tool_calls) {
return trigger_rule("tool-call", one_or_more(tool_choices + space()));
}
return trigger_rule("tool-call", tool_choices);
};
auto section = build_section();
if (!force_tool_calls) {
section = optional(section);
}
return section;
}
common_peg_parser common_chat_peg_unified_builder::build_function(const tool_call_structure & ts,
const std::string & name,
const nlohmann::json & schema) {
auto args = build_arguments(ts, schema);
switch (ts.function_format) {
case tool_call_structure::FUNC_JSON_OBJECT:
{
// Build JSON object parser that accepts id field in either position:
// - Before name: {"id": "...", "name": "X", "arguments": {...}} (R7B style)
// - After args: {"name": "X", "arguments": {...}, "id": "..."} (Mistral style)
auto tool_name_ = json_member(ts.name_field, "\"" + tool_name(literal(name)) + "\"");
auto tool_args_ = json_member(ts.args_field, tool_args(args));
// id can appear before name or after args
auto id_member = json_member(ts.id_field, tool_id(json_string()));
auto id_before = ts.id_field.empty() ? eps() : optional(id_member << space() << "," << space());
auto id_after = ts.id_field.empty() ? eps() : optional(space() << "," << space() << id_member);
return tool(tool_open(literal("{")) << space() << id_before // optional id before name (R7B style)
<< tool_name_ << space() << "," << space() << tool_args_
<< id_after // optional id after args (Mistral style)
<< zero_or_more(space() << "," << space() << json_string()
<< space() << ":" << space() << json())
<< space() << "}");
}
case tool_call_structure::FUNC_TAG_WITH_NAME:
{
// Build tag parser: <function=X>{...}</function>
// Combine prefix + name + suffix into tool_open to ensure the tool is only created
// when the FULL opening tag is confirmed. This prevents partial name matches during
// incremental parsing (e.g., matching "special_function" when input is "special_function_")
auto opening = literal(ts.function_prefix) + tool_name(literal(name)) + literal(ts.function_suffix);
// Note: No space() before tool_close because function_close may start with newline
// (e.g., "\n```<close_tag>") and space() would consume it, preventing the literal match
return tool(tool_open(opening) + space() + tool_args(args) + tool_close(literal(ts.function_close)));
}
case tool_call_structure::FUNC_TAG_NAME_ONLY:
{
// Build tag parser: <X>...</X>
// Combine < + name + > into tool_open to prevent partial matches
auto opening = literal("<") + tool_name(literal(name)) + literal(">");
return tool(tool_open(opening) + space() + tool_args(args) + space() +
tool_close(literal("</" + name + ">")));
}
case tool_call_structure::FUNC_PREFIXED_INDEXED:
{
// Build prefixed-indexed parser (e.g., Kimi-K2):
// <|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{...}<|tool_call_end|>
// The index number after : is ignored (we use zero_or_more(digit) to skip it)
auto opening = literal(ts.per_call_start) + literal(ts.function_namespace) + tool_name(literal(name)) +
literal(":") + zero_or_more(chars("0-9", 1, 1)) + // Skip the index
literal(ts.args_marker);
return tool(tool_open(opening) + space() + tool_args(args) + space() +
tool_close(literal(ts.per_call_end)));
}
case tool_call_structure::FUNC_NAME_AS_KEY:
{
// Build name-as-key parser (e.g., Apertus):
// {"function_name": {...arguments...}}
// The function name IS the JSON key, and arguments are the value directly
auto opening = literal("{\"") + tool_name(literal(name)) + literal("\":");
return tool(tool_open(opening) + space() + tool_args(args) + space() + literal("}"));
}
case tool_call_structure::FUNC_BRACKET_TAG:
{
// Build bracket-tag parser (e.g., Mistral Small 3.2):
// [TOOL_CALLS]function_name[CALL_ID]call_id[ARGS]{...}
// per_call_start = "[TOOL_CALLS]"
// id_marker = "[CALL_ID]"
// args_marker = "[ARGS]"
auto opening = literal(ts.per_call_start) + tool_name(literal(name));
if (!ts.id_marker.empty()) {
// Add id_marker + id value (captured as tool_id)
opening = opening + literal(ts.id_marker) + tool_id(until(ts.args_marker));
}
if (!ts.args_marker.empty()) {
opening = opening + literal(ts.args_marker);
}
// No explicit closer for this format (EOS terminates)
return tool(tool_open(opening) + space() + tool_args(args));
}
case tool_call_structure::FUNC_RECIPIENT_BASED:
{
// Build recipient-based parser (e.g., Functionary v3.2):
// >>>function_name
// {'param1': 'value1', 'param2': 'value2'}
// tool_section_start = ">>>"
// Function name directly follows ">>>" with newline, arguments are Python dict (parse as JSON)
auto opening = literal(ts.tool_section_start) + tool_name(literal(name));
// No explicit closer (newline + arguments, then EOS or next >>>)
return tool(tool_open(opening) + space() + tool_args(args));
}
case tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK:
{
// Build markdown code block parser (e.g., Cohere Command-R Plus):
// Action:
// ```json
// [
// {
// "tool_name": "function_name",
// "parameters": {...}
// }
// ]
// ```
// The individual function is a JSON object within the array
auto tool_name_ = json_member(ts.name_field, "\"" + tool_name(literal(name)) + "\"");
auto tool_args_ = json_member(ts.args_field, tool_args(args));
// Build the JSON object: {"tool_name": "...", "parameters": {...}}
// Use same pattern as FUNC_JSON_OBJECT: tool_open with atomic wrapper
return tool(tool_open(literal("{")) << space() << tool_name_ << space() << "," << space() << tool_args_
<< zero_or_more(space() << "," << space() << json_string()
<< space() << ":" << space() << json())
<< space() << "}");
}
}
return eps();
}
common_peg_parser common_chat_peg_unified_builder::build_arguments(const tool_call_structure & ts,
const nlohmann::json & params) {
switch (ts.argument_format) {
case tool_call_structure::ARGS_JSON:
{
// Standard JSON object arguments
if (params.is_object()) {
return schema(json(), "args", params);
}
return json();
}
case tool_call_structure::ARGS_TAGGED:
{
// Tagged arguments: <param=key>value</param>
if (!params.contains("properties") || params.at("properties").empty()) {
return eps();
}
auto arg_choice = choice();
for (const auto & el : params.at("properties").items()) {
const std::string & prop_name = el.key();
const auto & prop_schema = el.value();
// Check if the schema declares this as a string type
bool is_string_type = prop_schema.contains("type") && prop_schema.at("type") == "string";
auto arg_name_parser = choice(
{ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
// Use tool_arg_string_value for string types to prevent treating "[..." as JSON array
auto value_parser = is_string_type ? tool_arg_string_value(until(ts.arg_close))
: tool_arg_value(until(ts.arg_close));
auto arg_rule = tool_arg(tool_arg_open(literal(ts.arg_prefix)) + tool_arg_name(arg_name_parser) +
literal(ts.arg_suffix) + value_parser +
tool_arg_close(literal(ts.arg_close)) +
(ts.arg_separator.empty() ? eps() : optional(literal(ts.arg_separator))));
arg_choice |= arg_rule;
}
return zero_or_more(arg_choice + space());
}
case tool_call_structure::ARGS_KEY_VALUE_TAGS:
{
// Key-value tag arguments (GLM-4.6 style):
// <arg_key>key</arg_key>
// <arg_value>value</arg_value>
if (!params.contains("properties") || params.at("properties").empty()) {
return eps();
}
auto arg_choice = choice();
for (const auto & el : params.at("properties").items()) {
const std::string & prop_name = el.key();
const auto & prop_schema = el.value();
// Check if the schema declares this as a string type
bool is_string_type = prop_schema.contains("type") && prop_schema.at("type") == "string";
// Parse: <arg_key>key</arg_key>\n<arg_value>value</arg_value>
// ts.arg_prefix = "<arg_key>", ts.arg_suffix = "</arg_key>", ts.arg_close = "</arg_value>"
// Use tool_arg_string_value for string types to prevent treating "[..." as JSON array
auto value_parser = is_string_type ? tool_arg_string_value(until(ts.arg_close))
: tool_arg_value(until(ts.arg_close));
auto arg_rule = tool_arg(tool_arg_open(literal(ts.arg_prefix)) + tool_arg_name(literal(prop_name)) +
literal(ts.arg_suffix) + // </arg_key>
space() + literal("<arg_value>") + value_parser +
tool_arg_close(literal(ts.arg_close)));
arg_choice |= arg_rule;
}
return zero_or_more(arg_choice + space());
}
}
return eps();
}
common_peg_parser common_chat_peg_unified_builder::standard_json_tools(const std::string & section_start,
const std::string & section_end,
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls) {
if (!tools.is_array() || tools.empty()) {
return eps();
}
// Build tool choices for JSON format
auto tool_choices = choice();
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
// Build JSON object parser: {"name": "X", "arguments": {...}}
auto tool_name_ = json_member("name", "\"" + tool_name(literal(name)) + "\"");
auto tool_args_ = json_member("arguments", tool_args(schema(json(), "tool-" + name + "-schema", params)));
auto tool_parser =
tool(tool_open(literal("{")) << space() << tool_name_ << space() << "," << space() << tool_args_
<< zero_or_more(space() << "," << space() << json_string() << space() << ":"
<< space() << json())
<< space() << "}");
tool_choices |= rule("tool-" + name, tool_parser);
}
// Build the section with markers
auto tool_calls = tool_choices;
if (parallel_tool_calls) {
tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
}
auto section =
trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end));
return force_tool_calls ? section : optional(section);
}
common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools(
const std::map<std::string, std::string> & markers,
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls) {
if (!tools.is_array() || tools.empty()) {
return eps();
}
// Extract markers with defaults
auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string {
auto it = markers.find(key);
return it != markers.end() ? it->second : default_val;
};
std::string section_start = get_marker("tool_call_start_marker", "<tool_call>");
std::string section_end = get_marker("tool_call_end_marker", "</tool_call>");
std::string func_opener = get_marker("function_opener", "<function=");
std::string func_name_suffix = get_marker("function_name_suffix", ">");
std::string func_closer = get_marker("function_closer", "</function>");
std::string param_key_prefix = get_marker("parameter_key_prefix", "<param=");
std::string param_key_suffix = get_marker("parameter_key_suffix", ">");
std::string param_closer = get_marker("parameter_closer", "</param>");
// Build tool choices for tagged format
auto tool_choices = choice();
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
// Build argument parsers
auto args = eps();
if (params.contains("properties") && !params["properties"].empty()) {
auto arg_choice = choice();
for (const auto & el : params["properties"].items()) {
const std::string & prop_name = el.key();
auto arg_name_parser =
choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) +
literal(param_key_suffix) + tool_arg_value(until(param_closer)) +
tool_arg_close(literal(param_closer)));
arg_choice |= arg_rule;
}
args = zero_or_more(arg_choice + space());
}
// Build function parser: <function=name>args</function>
auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) +
space() + tool_args(args) + space() + tool_close(literal(func_closer)));
tool_choices |= rule("tool-" + name, tool_parser);
}
// Build the section with markers
auto section =
parallel_tool_calls ?
trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) +
literal(section_end)) :
trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end));
return force_tool_calls ? section : optional(section);
}
void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena,
const common_peg_parse_result & parse_result_arg) {
// Call base class to visit all nodes
@ -734,7 +155,7 @@ void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & ar
// Flush any pending tool call that was started but never got a name
// This happens during partial parsing when the tool call is incomplete
if (pending_tool_call.has_value()) {
if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
// Transfer any buffered arguments
if (!args_buffer.empty()) {
pending_tool_call->arguments = args_buffer;
@ -954,7 +375,6 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
current_tool->arguments += "\"";
needs_closing_quote = false;
}
// Close the arguments object if using tagged format
if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
current_tool->arguments += "}";
}
@ -982,3 +402,352 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
}
}
}
common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools(
const std::map<std::string, std::string> & markers,
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls) {
if (!tools.is_array() || tools.empty()) {
return eps();
}
// Extract markers with defaults
auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string {
auto it = markers.find(key);
return it != markers.end() ? it->second : default_val;
};
std::string section_start = get_marker("tool_call_start_marker", "<tool_call>");
std::string section_end = get_marker("tool_call_end_marker", "</tool_call>");
std::string func_opener = get_marker("function_opener", "<function=");
std::string func_name_suffix = get_marker("function_name_suffix", ">");
std::string func_closer = get_marker("function_closer", "</function>");
std::string param_key_prefix = get_marker("parameter_key_prefix", "<param=");
std::string param_key_suffix = get_marker("parameter_key_suffix", ">");
std::string param_closer = get_marker("parameter_closer", "</param>");
// Build tool choices for tagged format
auto tool_choices = choice();
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
// Build argument parsers
auto args = eps();
if (params.contains("properties") && !params["properties"].empty()) {
auto arg_choice = choice();
for (const auto & el : params["properties"].items()) {
const std::string & prop_name = el.key();
auto arg_name_parser =
choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) +
literal(param_key_suffix) + tool_arg_value(until(param_closer)) +
tool_arg_close(literal(param_closer)));
arg_choice |= arg_rule;
}
args = zero_or_more(arg_choice + space());
}
// Build function parser: <function=name>args</function>
auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) +
space() + tool_args(args) + space() + tool_close(literal(func_closer)));
tool_choices |= rule("tool-" + name, tool_parser);
}
// Build the section with markers
auto section =
parallel_tool_calls ?
trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) +
literal(section_end)) :
trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end));
return force_tool_calls ? section : optional(section);
}
// Helper: Parse dot notation key into prefix and field name
static std::pair<std::string, std::string> parse_key_spec(const std::string & key) {
auto dot_pos = key.find('.');
if (dot_pos == std::string::npos) {
return {"", key}; // Top-level field
}
return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
}
common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
const std::string & section_start,
const std::string & section_end,
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls,
const std::string & name_key,
const std::string & args_key,
bool array_wrapped,
bool function_is_key,
const std::string & call_id_key,
const std::string & gen_call_id_key,
const std::vector<std::string> & parameters_order) {
if (!tools.is_array() || tools.empty()) {
return eps();
}
// Build tool choices for JSON format
auto tool_choices = choice();
// auto other_member = json_string() + space() + literal(":") + space() + json();
// Determine effective field names
std::string effective_name_key = name_key.empty() ? "name" : name_key;
std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
// Check if we have nested keys (dot notation)
auto name_spec = parse_key_spec(effective_name_key);
auto args_spec = parse_key_spec(effective_args_key);
bool has_nested_keys = !name_spec.first.empty() || !args_spec.first.empty();
// Mode 1: function_is_key - parse {"function_name": {...}}
if (function_is_key) {
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
// Build inner object fields
std::vector<common_peg_parser> inner_fields;
// Add optional string ID field
if (!call_id_key.empty()) {
auto id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
literal("\"") + tool_id(json_string_content()) + literal("\"")
);
inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
}
// Add optional generated integer ID field
if (!gen_call_id_key.empty()) {
auto gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
}
// Add arguments - either wrapped in args_key or parsed directly
common_peg_parser args_parser = eps();
if (args_key.empty()) {
// Arguments are directly the inner object value: {"func_name": {"arg1": "val"}}
args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
} else {
// Arguments are wrapped in a key: {"func_name": {"arguments": {"arg1": "val"}}}
args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
}
inner_fields.push_back(args_parser);
// Build inner object parser - no greedy other_member skipping to avoid consuming ID
common_peg_parser inner_object = eps();
if (args_key.empty() && inner_fields.size() == 1) {
// Direct arguments: {"func_name": {"arg1": "val"}}
// The args_parser is already the full object schema
inner_object = inner_fields[0];
} else {
// Wrapped arguments: {"func_name": {"arguments": {"arg1": "val"}}}
inner_object = literal("{") + space();
for (size_t i = 0; i < inner_fields.size(); i++) {
inner_object = inner_object + inner_fields[i];
if (i < inner_fields.size() - 1) {
inner_object = inner_object + space();
}
}
inner_object = inner_object + space() + literal("}");
}
// Tool call format: { "function_name": { inner_object } }
auto tool_parser = tool(
tool_open(literal("{")) + space() +
literal("\"") + tool_name(literal(name)) + literal("\"") +
space() + literal(":") + space() +
inner_object +
space() + tool_close(literal("}"))
);
tool_choices |= rule("tool-" + name, tool_parser);
}
}
// Mode 2: Nested keys (dot notation like "function.name")
else if (has_nested_keys) {
// Group fields by prefix
std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
// Build nested object with name and arguments
auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
literal("\"") + tool_name(literal(name)) + literal("\"");
auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
auto nested_object = literal("{") + space() +
nested_name + space() + literal(",") + space() +
nested_args +
space() + literal("}");
// Build top-level parser - simpler structure without greedy other_member skipping
// Format: { id?, "function": {...} }
auto tool_parser_body = tool_open(literal("{")) + space();
// Add optional string ID field at top level
if (!call_id_key.empty()) {
auto id_spec = parse_key_spec(call_id_key);
if (id_spec.first.empty()) { // Top-level ID field
auto id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
literal("\"") + tool_id(json_string_content()) + literal("\"")
);
tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
}
}
// Add optional generated integer ID field at top level
if (!gen_call_id_key.empty()) {
auto gen_id_spec = parse_key_spec(gen_call_id_key);
if (gen_id_spec.first.empty()) { // Top-level gen ID field
auto gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
}
}
// Add the nested object field
auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
tool_choices |= rule("tool-" + name, tool(tool_parser_body));
}
}
// Mode 3: Flat keys (enhanced with ID fields and parameter ordering)
else {
auto name_key_parser = literal("\"" + name_key + "\"");
auto args_key_parser = literal("\"" + args_key + "\"");
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
auto tool_name_ = name_key_parser + space() + literal(":") + space() +
literal("\"") + tool_name(literal(name)) + literal("\"");
auto tool_args_ = args_key_parser + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
// Build ID parsers if keys are provided
common_peg_parser id_parser = eps();
if (!call_id_key.empty()) {
id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
}
common_peg_parser gen_id_parser = eps();
if (!gen_call_id_key.empty()) {
gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
}
common_peg_parser tool_parser = eps();
// Use parameter ordering if provided - parse fields in specified order without greedy skipping
if (!parameters_order.empty()) {
}
// Build parser using parameter ordering (works with or without explicit parameters_order)
// Create list of (parser, key) pairs for all fields
std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
parser_pairs.emplace_back(tool_name_, effective_name_key);
parser_pairs.emplace_back(tool_args_, effective_args_key);
if (!call_id_key.empty()) {
parser_pairs.emplace_back(optional(id_parser), call_id_key);
}
if (!gen_call_id_key.empty()) {
parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
}
// Sort by position in parameters_order (or at end if not present)
std::sort(parser_pairs.begin(), parser_pairs.end(),
[&parameters_order](const auto & a, const auto & b) {
auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
return idx_a < idx_b;
});
// Build ordered parser
auto ordered_body = tool_open(literal("{")) + space();
for (size_t i = 0; i < parser_pairs.size(); i++) {
ordered_body = ordered_body + parser_pairs[i].first;
if (i < parser_pairs.size() - 1) {
ordered_body = ordered_body + space() + literal(",") + space();
}
}
ordered_body = ordered_body + space() + tool_close(literal("}"));
tool_parser = tool(ordered_body);
tool_choices |= rule("tool-" + name, tool_parser);
}
}
// Build the section with markers
auto tool_calls = tool_choices;
if (parallel_tool_calls) {
tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
}
// Optionally wrap in array brackets
if (array_wrapped) {
tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
}
auto section =
trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end));
return force_tool_calls ? section : optional(section);
}

View File

@ -5,6 +5,7 @@
#include <map>
#include <optional>
#include <vector>
class common_chat_peg_builder : public common_peg_parser_builder {
public:
@ -63,65 +64,43 @@ class common_chat_peg_unified_builder : public common_chat_peg_builder {
// Low-level tag methods
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
// Use for schema-declared string types - won't be treated as potential JSON container
common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
// High-level building methods
// Build reasoning block based on ContentStructure
common_peg_parser build_reasoning_block(const content_structure & cs,
common_reasoning_format reasoning_format,
bool thinking_forced_open);
// Build content block based on ContentStructure
common_peg_parser build_content_block(const content_structure & cs,
common_reasoning_format reasoning_format,
const std::string & tool_section_start = "");
// Build complete tool section based on ToolCallStructure
common_peg_parser build_tool_section(const tool_call_structure & ts,
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls);
// Build single function parser based on ToolCallStructure
common_peg_parser build_function(const tool_call_structure & ts,
const std::string & name,
const nlohmann::json & schema);
// Build arguments parser based on ToolCallStructure
common_peg_parser build_arguments(const tool_call_structure & ts, const nlohmann::json & params);
// Legacy-compatible helper for building standard JSON tool calls
// Used by tests and manual parsers
common_peg_parser standard_json_tools(const std::string & section_start,
const std::string & section_end,
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls);
// name_key/args_key: JSON key names for function name and arguments
// Empty or "name"/"arguments" will accept both common variations
// Supports dot notation for nested objects (e.g., "function.name")
// array_wrapped: if true, tool calls are wrapped in JSON array [...]
// function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}})
// call_id_key: JSON key for string call ID (e.g., "id")
// gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id")
// parameters_order: order in which JSON fields should be parsed
common_peg_parser standard_json_tools(const std::string & section_start,
const std::string & section_end,
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls,
const std::string & name_key = "",
const std::string & args_key = "",
bool array_wrapped = false,
bool function_is_key = false,
const std::string & call_id_key = "",
const std::string & gen_call_id_key = "",
const std::vector<std::string> & parameters_order = {});
// Legacy-compatible helper for building XML/tagged style tool calls
// Used by tests and manual parsers

View File

@ -1,15 +1,12 @@
#include "chat.h"
#include "chat-auto-parser-helpers.h"
#include "chat-auto-parser.h"
#include "chat-peg-parser.h"
#include "common.h"
#include "ggml.h"
#include "json-schema-to-grammar.h"
#include "log.h"
#include "regex-partial.h"
#include "jinja/parser.h"
#include "jinja/value.h"
#include "jinja/runtime.h"
#include "jinja/caps.h"
@ -1026,6 +1023,114 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
return data;
}
// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content}
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl,
const struct templates_params & inputs) {
common_chat_params data;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = {
">>>all",
};
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
// Functionary v3.2 format:
// - Normal content: >>>all\n{content}
// - Tool calls: >>>function_name\n{json_args}
// Generation prompt ends with ">>>" so model outputs recipient immediately
// Build content parser for >>>all\n{content}
// When tools are present, content stops before the next ">>>" (tool call)
// When no tools, content goes until end
auto content_until_tool = p.literal(">>>all\n") + p.content(p.until(">>>"));
auto content_until_end = p.literal(">>>all\n") + p.content(p.rest());
// If no tools or tool_choice is NONE, just parse content
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
// When no tools, just match the prefix and capture everything after
return content_until_end + p.end();
}
// Build tool call parsers for each available function
auto tool_choice = p.choice();
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & schema = function.at("parameters");
// Tool format: >>>function_name\n{json_args}
auto tool_parser = p.tool(
p.tool_open(p.literal(">>>") + p.tool_name(p.literal(name)) + p.literal("\n")) +
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
);
tool_choice |= p.rule("tool-" + name, tool_parser);
});
// The model can output:
// 1. Just content: >>>all\n{content}
// 2. Just tool call(s): >>>function_name\n{json_args}
// 3. Both: >>>all\n{content}>>>function_name\n{json_args}
// Option 1: Content only (no following tool call)
auto content_only = content_until_end;
// Option 2: Content followed by tool call(s)
auto content_and_tools = content_until_tool + p.one_or_more(tool_choice);
// Option 3: Just tool call(s) (no content)
auto tools_only = p.one_or_more(tool_choice);
if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
// Must have at least one tool call
if (inputs.parallel_tool_calls) {
// Multiple tool calls allowed
return p.choice({ content_and_tools, tools_only }) + p.end();
} else {
// Single tool call only
return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
}
} else {
// Tool calls are optional (auto mode)
if (inputs.parallel_tool_calls) {
// Multiple tool calls allowed
return p.choice({ content_and_tools, content_only, tools_only }) + p.end();
} else {
// Single tool call at most
auto content_and_tool = content_until_tool + tool_choice;
return p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
}
}
});
data.parser = parser.save();
if (include_grammar) {
data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
auto schema = function.at("parameters");
builder.resolve_refs(schema);
});
parser.build_grammar(builder, data.grammar_lazy);
});
// Grammar trigger for when the model starts outputting a tool call
// (after the initial ">>>" in the generation prompt)
data.grammar_triggers = {
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ">>>" }
};
}
return data;
}
namespace workaround {
// if first message is system and template does not support it, merge it with next message
@ -1074,6 +1179,8 @@ static void func_args_not_string(json & messages) {
}
}
}
static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls,
const struct common_chat_templates_inputs & inputs) {
templates_params params;
@ -1097,7 +1204,10 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
workaround::system_message_not_supported(params.messages);
}
if (!tmpl.original_caps().requires_non_null_content) {
if (tmpl.original_caps().supports_tool_calls) {
// some templates will require the content field in tool call messages
// to still be non-null, this puts an empty string everywhere where the
// content field is null
workaround::requires_non_null_content(params.messages);
}
@ -1132,20 +1242,26 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
// Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) {
LOG_INF("Using specialized template: Ministral/Magistral Large 3\n");
LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n");
return common_chat_params_init_ministral_3(tmpl, params);
}
// GPT-OSS - has unique channel-based structure that needs dedicated handler
if (src.find("<|channel|>") != std::string::npos) {
LOG_INF("Using specialized template: GPT-OSS\n");
LOG_DBG("Using specialized template: GPT-OSS\n");
return common_chat_params_init_gpt_oss(tmpl, params);
}
// Functionary v3.2 - uses recipient-based format with >>>recipient\n{content}
// Detection: template has ">>>all" for content and ">>>" prefix for tool calls
if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) {
LOG_DBG("Using specialized template: Functionary v3.2\n");
return common_chat_params_init_functionary_v3_2(tmpl, params);
}
try {
LOG_INF("Using autoparser for template analysis\n");
template_analysis_result analysis = template_analyzer::analyze_template(tmpl);
auto auto_params = universal_peg_generator::generate_parser(analysis, tmpl, params);
LOG_DBG("Using differential autoparser\n");
auto auto_params = universal_peg_generator::generate_parser(tmpl, params);
return auto_params;
} catch (const std::exception & e) {
LOG_WRN("Automatic parser generation failed: %s\n", e.what());
@ -1227,22 +1343,24 @@ common_chat_params common_chat_templates_apply(const struct common_chat_template
common_chat_templates_apply_legacy(tmpls, inputs);
}
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
common_chat_msg common_chat_parse(const std::string & input,
bool is_partial,
const common_chat_parser_params & params) {
return common_chat_peg_parse(params.parser, input, is_partial, params);
}
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser,
const std::string & input,
bool is_partial,
const common_chat_syntax & syntax) {
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser,
const std::string & input,
bool is_partial,
const common_chat_parser_params & params) {
if (parser.empty()) {
throw std::runtime_error("Failed to parse due to missing parser definition.");
}
LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str());
common_peg_parse_context ctx(input, is_partial);
ctx.debug = syntax.debug;
ctx.debug = params.debug;
auto result = parser.parse(ctx);
if (result.fail()) {
@ -1252,13 +1370,9 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser,
// Try to extract any partial results from what was successfully parsed
common_chat_msg msg;
msg.role = "assistant";
if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
auto mapper = common_chat_peg_unified_mapper(msg);
mapper.from_ast(ctx.ast, result);
} else {
auto mapper = common_chat_peg_mapper(msg);
mapper.from_ast(ctx.ast, result);
}
auto mapper = common_chat_peg_unified_mapper(msg);
mapper.from_ast(ctx.ast, result);
if (ctx.debug) {
fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str());
fflush(stderr);
@ -1272,21 +1386,16 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser,
common_chat_msg msg;
msg.role = "assistant";
if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
auto mapper = common_chat_peg_unified_mapper(msg);
mapper.from_ast(ctx.ast, result);
} else {
// Generic mapper
auto mapper = common_chat_peg_mapper(msg);
mapper.from_ast(ctx.ast, result);
}
auto mapper = common_chat_peg_unified_mapper(msg);
mapper.from_ast(ctx.ast, result);
if (ctx.debug) {
fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str());
fflush(stderr);
}
if (!is_partial) {
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({ msg }).at(0).dump().c_str());
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str());
}
return msg;
}
@ -1296,3 +1405,4 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
GGML_ASSERT(chat_templates->template_default != nullptr);
return chat_templates->template_default->caps.to_map();
}

View File

@ -267,17 +267,12 @@ std::string common_chat_format_example(const struct common_chat_templates *
const std::map<std::string, std::string> & chat_template_kwargs);
const char * common_chat_format_name(common_chat_format format);
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params);
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & params);
// used by arg and server
const char * common_reasoning_format_name(common_reasoning_format format);
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser,
const std::string & input,
bool is_partial,
const common_chat_syntax & syntax);
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);

View File

@ -1,3 +1,4 @@
#include "log.h"
#include "value.h"
#include "runtime.h"
#include "caps.h"
@ -16,7 +17,7 @@ using json = nlohmann::ordered_json;
namespace jinja {
using caps_json_fn = std::function<json()>;
using caps_analyze_fn = std::function<void(bool, value &, value &)>;
using caps_analyze_fn = std::function<void(bool, value &, value &, const std::string &)>;
static void caps_try_execute(jinja::program & prog,
const caps_json_fn & messages_fn,
@ -36,16 +37,20 @@ static void caps_try_execute(jinja::program & prog,
auto tools = ctx.get_val("tools");
bool success = false;
std::string result;
try {
jinja::runtime runtime(ctx);
runtime.execute(prog);
auto results = runtime.execute(prog);
auto parts = jinja::runtime::gather_string_parts(results);
std::string result = parts->as_string().str();
success = true;
} catch (const std::exception & e) {
JJ_DEBUG("Exception during execution: %s", e.what());
result = "";
// ignore exceptions during capability analysis
}
analyze_fn(success, messages, tools);
analyze_fn(success, messages, tools, result);
}
// for debugging only
@ -64,7 +69,6 @@ static void caps_print_stats(value & v, const std::string & path) {
std::map<std::string, bool> caps::to_map() const {
return {
{"requires_typed_content", requires_typed_content},
{"requires_non_null_content", requires_non_null_content},
{"supports_tools", supports_tools},
{"supports_tool_calls", supports_tool_calls},
{"supports_parallel_tool_calls", supports_parallel_tool_calls},
@ -106,7 +110,7 @@ caps caps_get(jinja::program & prog) {
// tools
return json{nullptr};
},
[&](bool, value & messages, value &) {
[&](bool, value & messages, value &, const std::string &) {
auto & content = messages->at(0)->at("content");
caps_print_stats(content, "messages[0].content");
if (has_op(content, "selectattr") || has_op(content, "array_access")) {
@ -137,7 +141,7 @@ caps caps_get(jinja::program & prog) {
// tools
return json::array();
},
[&](bool, value & messages, value &) {
[&](bool, value & messages, value &, const std::string &) {
auto & content = messages->at(0)->at("content");
caps_print_stats(content, "messages[0].content");
if (!content->stats.used) {
@ -182,6 +186,15 @@ caps caps_get(jinja::program & prog) {
}
})}
},
{
{"role", "tool"},
{"content", "Tool response"},
{"tool_call_id", "call00001"}
},
{
{"role", "assistant"},
{"content", "The tool response was 'tool response'"}
},
{
{"role", "user"},
{"content", "User message"},
@ -211,7 +224,7 @@ caps caps_get(jinja::program & prog) {
},
});
},
[&](bool success, value & messages, value & tools) {
[&](bool success, value & messages, value & tools, const std::string & res) {
if (!success) {
result.supports_tool_calls = false;
result.supports_tools = false;
@ -220,8 +233,11 @@ caps caps_get(jinja::program & prog) {
auto & tool_name = tools->at(0)->at("function")->at("name");
caps_print_stats(tool_name, "tools[0].function.name");
caps_print_stats(tools, "tools");
if (!tool_name->stats.used) {
result.supports_tools = false;
if (!tools->stats.used && res.find(tool_name->as_string().str()) == std::string::npos) {
result.supports_tools = false;
}
}
auto & tool_calls = messages->at(1)->at("tool_calls");;
@ -239,83 +255,6 @@ caps caps_get(jinja::program & prog) {
}
);
// case: requires non-null content in tool calls
if (result.supports_tool_calls) {
caps_try_execute(
prog,
[&]() {
// messages
return json::array({
{
{ "role", "user" },
{ "content", "User message" },
},
{
{ "role", "assistant" },
{ "tool_calls",
json::array({
{
{ "id", "call00001" },
{ "type", "function" },
{ "function",
{
{ "name", "tool1" },
{ "arguments",
{
{ "arg", "value" }
}
}
}
}
},
})
}
},
{
{ "role", "user" },
{ "content", "User message" },
},
});
},
[&]() {
// tools
return json::array({
{
{ "name", "tool" },
{ "type", "function" },
{ "function",
{
{ "name", "tool1" },
{ "description", "Tool description" },
{ "parameters",
{
{ "type", "object" },
{ "properties",
{
{ "arg",
{
{ "type", "string" },
{ "description", "Arg description" },
}
},
}
},
{ "required", json::array({ "arg" }) },
}
},
}
},
},
});
},
[&](bool success, value & /* messages */, value & /* tools */) {
if (!success) {
result.requires_non_null_content = true;
}
}
);
}
// case: preserve reasoning content in chat history
caps_try_execute(
prog,
@ -341,7 +280,7 @@ caps caps_get(jinja::program & prog) {
// tools
return json::array();
},
[&](bool, value & messages, value &) {
[&](bool, value & messages, value &, const std::string &) {
auto & content = messages->at(1)->at("reasoning_content");
caps_print_stats(content, "messages[1].reasoning_content");
if (content->stats.used) {

View File

@ -15,7 +15,6 @@ struct caps {
bool supports_preserve_reasoning = false; // support assistant message with reasoning_content
bool requires_typed_content = false; // default: use string content
bool requires_non_null_content = false; // requires "" instead of null for content in tool calls
// for reporting on server
std::map<std::string, bool> to_map() const;

View File

@ -27,11 +27,11 @@ static std::string build_repetition(const std::string & item_rule, int min_items
if (separator_rule.empty()) {
if (min_items == 1 && !has_max) {
return item_rule + "+";
} else if (min_items == 0 && !has_max) {
}
if (min_items == 0 && !has_max) {
return item_rule + "*";
} else {
return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
}
}
return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
}
auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items);
@ -41,7 +41,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items
return result;
}
static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
static void build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
auto has_min = min_value != std::numeric_limits<int64_t>::min();
auto has_max = max_value != std::numeric_limits<int64_t>::max();
@ -128,14 +128,14 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
if (has_min && has_max) {
if (min_value < 0 && max_value < 0) {
out << "\"-\" (";
_build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
out << ")";
return;
}
if (min_value < 0) {
out << "\"-\" (";
_build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
out << ") | ";
min_value = 0;
}
@ -159,7 +159,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
if (has_min) {
if (min_value < 0) {
out << "\"-\" (";
_build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
out << ") | [0] | [1-9] ";
more_digits(0, decimals_left - 1);
} else if (min_value == 0) {
@ -194,7 +194,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
}
digit_range(c, c);
out << " (";
_build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
out << ")";
if (c < '9') {
out << " | ";
@ -213,10 +213,10 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
more_digits(0, less_decimals);
out << " | ";
}
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
} else {
out << "\"-\" (";
_build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
out << ")";
}
return;
@ -232,7 +232,7 @@ struct BuiltinRule {
std::vector<std::string> deps;
};
std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
static std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
{"boolean", {"(\"true\" | \"false\") space", {}}},
{"decimal-part", {"[0-9]{1,16}", {}}},
{"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}},
@ -247,7 +247,7 @@ std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
{"null", {"\"null\" space", {}}},
};
std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
static std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
{"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
{"date-time", {"date \"T\" time", {"date", "time"}}},
@ -260,22 +260,26 @@ static bool is_reserved_name(const std::string & name) {
static const std::unordered_set<std::string> RESERVED_NAMES = [] {
std::unordered_set<std::string> s;
s.insert("root");
for (const auto & p : PRIMITIVE_RULES) s.insert(p.first);
for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first);
for (const auto & p : PRIMITIVE_RULES) {
s.insert(p.first);
}
for (const auto & p : STRING_FORMAT_RULES) {
s.insert(p.first);
}
return s;
}();
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
}
std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
static std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
static std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
static std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
static std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
{'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"}
};
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
static std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
static std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
std::smatch match;
@ -322,19 +326,19 @@ private:
if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
_rules[esc_name] = rule;
return esc_name;
} else {
int i = 0;
while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
i++;
}
std::string key = esc_name + std::to_string(i);
_rules[key] = rule;
return key;
}
int i = 0;
while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
i++;
}
std::string key = esc_name + std::to_string(i);
_rules[key] = rule;
return key;
}
std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
std::vector<std::string> rules;
rules.reserve(alt_schemas.size());
for (size_t i = 0; i < alt_schemas.size(); i++) {
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
}
@ -398,6 +402,7 @@ private:
flush_literal();
std::vector<std::string> results;
results.reserve(ret.size());
for (const auto & item : ret) {
results.push_back(to_rule(item));
}
@ -551,7 +556,7 @@ private:
TrieNode() : is_end_of_string(false) {}
void insert(const std::string & string) {
auto node = this;
auto *node = this;
for (char c : string) {
node = &node->children[c];
}
@ -676,7 +681,7 @@ private:
if (ks.empty()) {
return res;
}
std::string k = ks[0];
const std::string& k = ks[0];
std::string kv_rule_name = prop_kv_rule_names[k];
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
if (first_is_optional) {
@ -779,7 +784,7 @@ public:
std::string pointer = ref.substr(ref.find('#') + 1);
std::vector<std::string> tokens = string_split(pointer, "/");
for (size_t i = 1; i < tokens.size(); ++i) {
std::string sel = tokens[i];
const std::string& sel = tokens[i];
if (target.is_object() && target.contains(sel)) {
target = target[sel];
} else if (target.is_array()) {
@ -802,7 +807,7 @@ public:
_refs[ref] = target;
}
} else {
for (auto & kv : n.items()) {
for (const auto & kv : n.items()) {
visit_refs(kv.value());
}
}
@ -812,7 +817,7 @@ public:
visit_refs(schema);
}
std::string _generate_constant_rule(const json & value) {
static std::string _generate_constant_rule(const json & value) {
return format_literal(value.dump());
}
@ -823,10 +828,12 @@ public:
if (schema.contains("$ref")) {
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
}
if (schema.contains("oneOf") || schema.contains("anyOf")) {
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
} else if (schema_type.is_array()) {
}
if (schema_type.is_array()) {
std::vector<json> schema_types;
for (const auto & t : schema_type) {
json schema_copy(schema);
@ -834,15 +841,18 @@ public:
schema_types.push_back(schema_copy);
}
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
} else if (schema.contains("const")) {
}
if (schema.contains("const")) {
return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
} else if (schema.contains("enum")) {
}
if (schema.contains("enum")) {
std::vector<std::string> enum_values;
for (const auto & v : schema["enum"]) {
enum_values.push_back(_generate_constant_rule(v));
}
return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space");
} else if ((schema_type.is_null() || schema_type == "object")
}
if ((schema_type.is_null() || schema_type == "object")
&& (schema.contains("properties") ||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
std::unordered_set<std::string> required;
@ -863,11 +873,12 @@ public:
_build_object_rule(
properties, required, name,
schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
} else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
}
if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
std::unordered_set<std::string> required;
std::vector<std::pair<std::string, json>> properties;
std::map<std::string, size_t> enum_values;
std::string hybrid_name = name;
const std::string& hybrid_name = name;
std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
if (comp_schema.contains("$ref")) {
add_component(_refs[comp_schema["$ref"]], is_required);
@ -890,9 +901,9 @@ public:
// todo warning
}
};
for (auto & t : schema["allOf"]) {
for (const auto & t : schema["allOf"]) {
if (t.contains("anyOf")) {
for (auto & tt : t["anyOf"]) {
for (const auto & tt : t["anyOf"]) {
add_component(tt, false);
}
} else {
@ -911,7 +922,8 @@ public:
}
}
return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
} else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
}
if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
if (items.is_array()) {
std::string rule = "\"[\" space ";
@ -923,27 +935,31 @@ public:
}
rule += " \"]\" space";
return _add_rule(rule_name, rule);
} else {
std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
}
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
}
if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
return _visit_pattern(schema["pattern"], rule_name);
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
}
if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
} else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
}
if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
auto prim_name = schema_format + "-string";
return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
} else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
}
if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
}
if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
int64_t min_value = std::numeric_limits<int64_t>::min();
int64_t max_value = std::numeric_limits<int64_t>::max();
if (schema.contains("minimum")) {
@ -958,19 +974,19 @@ public:
}
std::stringstream out;
out << "(";
_build_min_max_int(min_value, max_value, out);
build_min_max_int(min_value, max_value, out);
out << ") space";
return _add_rule(rule_name, out.str());
} else if (schema.empty() || schema_type == "object") {
}
if (schema.empty() || schema_type == "object") {
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
} else {
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
_errors.push_back("Unrecognized schema: " + schema.dump());
return "";
}
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
}
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
_errors.push_back("Unrecognized schema: " + schema.dump());
return "";
}
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
}
void check_errors() {
@ -985,7 +1001,7 @@ public:
std::string format_grammar() {
std::stringstream ss;
for (const auto & kv : _rules) {
ss << kv.first << " ::= " << kv.second << std::endl;
ss << kv.first << " ::= " << kv.second << '\n';
}
return ss.str();
}

View File

@ -692,6 +692,7 @@ struct parser_executor {
switch (ctx.input[pos]) {
case '"':
case '\'':
case '\\':
case '/':
case 'b':
@ -768,6 +769,48 @@ struct parser_executor {
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
}
common_peg_parse_result operator()(const common_peg_python_dict_string_parser & /* p */) {
auto pos = start_pos;
// Parse string content (without quotes)
while (pos < ctx.input.size()) {
char c = ctx.input[pos];
if (c == '\'') {
// Found closing quote - success (don't consume it)
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
}
if (c == '\\') {
auto result = handle_escape_sequence(ctx, start_pos, pos);
if (!result.success()) {
return result;
}
} else {
auto utf8_result = parse_utf8_codepoint(ctx.input, pos);
if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
if (!ctx.is_partial) {
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
}
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
}
if (utf8_result.status == utf8_parse_result::INVALID) {
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
}
pos += utf8_result.bytes_consumed;
}
}
// Reached end without finding closing quote
if (!ctx.is_partial) {
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
}
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
}
common_peg_parse_result operator()(const common_peg_until_parser & p) const {
trie matcher(p.delimiters);
@ -955,6 +998,7 @@ void common_peg_arena::resolve_refs() {
std::is_same_v<T, common_peg_until_parser> ||
std::is_same_v<T, common_peg_literal_parser> ||
std::is_same_v<T, common_peg_json_string_parser> ||
std::is_same_v<T, common_peg_python_dict_string_parser> ||
std::is_same_v<T, common_peg_chars_parser> ||
std::is_same_v<T, common_peg_any_parser> ||
std::is_same_v<T, common_peg_space_parser>) {
@ -1036,6 +1080,8 @@ std::string common_peg_arena::dump_impl(common_peg_parser_id
std::to_string(p.max_count) + ")";
} else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
return "JsonString()";
} else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
return "PythonDictString()";
} else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
return "Until(" + string_join(p.delimiters, " | ") + ")";
} else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
@ -1266,10 +1312,28 @@ common_peg_parser common_peg_parser_builder::json_number() {
}
common_peg_parser common_peg_parser_builder::json_string() {
// When allow_python_dict_format is true, accept both single and double quotes
if (allow_python_dict_format_) {
return rule("json-string-flex", [this]() {
auto json_str = sequence({ literal("\""), json_string_content(), literal("\""), space() });
auto python_str = sequence({ literal("'"), python_dict_string_content(), literal("'"), space() });
return choice({ json_str, python_str });
});
}
// Standard JSON strings with double quotes only
return rule("json-string",
[this]() { return sequence({ literal("\""), json_string_content(), literal("\""), space() }); });
}
common_peg_parser common_peg_parser_builder::flexible_string() {
// Always returns a choice of both quote styles regardless of flag
return rule("flexible-string", [this]() {
auto json_str = sequence({ literal("\""), json_string_content(), literal("\""), space() });
auto python_str = sequence({ literal("'"), python_dict_string_content(), literal("'"), space() });
return choice({ json_str, python_str });
});
}
common_peg_parser common_peg_parser_builder::json_bool() {
return rule("json-bool", [this]() { return sequence({ choice({ literal("true"), literal("false") }), space() }); });
}
@ -1305,6 +1369,57 @@ common_peg_parser common_peg_parser_builder::json_string_content() {
return wrap(arena_.add_parser(common_peg_json_string_parser{}));
}
common_peg_parser common_peg_parser_builder::python_dict_string_content() {
return wrap(arena_.add_parser(common_peg_python_dict_string_parser{}));
}
common_peg_parser common_peg_parser_builder::python_dict_string() {
return rule("python-dict-string",
[this]() { return sequence({ literal("'"), python_dict_string_content(), literal("'"), space() }); });
}
common_peg_parser common_peg_parser_builder::python_dict_number() {
// Same as JSON number
return json_number();
}
common_peg_parser common_peg_parser_builder::python_dict_bool() {
// Same as JSON bool
return json_bool();
}
common_peg_parser common_peg_parser_builder::python_dict_null() {
// Same as JSON null
return json_null();
}
common_peg_parser common_peg_parser_builder::python_dict_object() {
return rule("python-dict-object", [this]() {
auto ws = space();
auto member = sequence({ python_dict_string(), ws, literal(":"), ws, python_dict() });
auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) });
});
}
common_peg_parser common_peg_parser_builder::python_dict_array() {
return rule("python-dict-array", [this]() {
auto ws = space();
auto elements = sequence({ python_dict(), zero_or_more(sequence({ literal(","), ws, python_dict() })) });
return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) });
});
}
common_peg_parser common_peg_parser_builder::python_dict() {
return rule("python-dict-value", [this]() {
std::vector<common_peg_parser> parsers = {
python_dict_object(), python_dict_array(), python_dict_string(), python_dict_number(),
python_dict_bool(), python_dict_null()
};
return choice(parsers);
});
}
common_peg_parser common_peg_parser_builder::json_member(const std::string & key, const common_peg_parser & p) {
auto ws = space();
return sequence({
@ -1435,7 +1550,8 @@ static std::unordered_set<std::string> collect_reachable_rules(const common_peg_
std::is_same_v<T, common_peg_literal_parser> ||
std::is_same_v<T, common_peg_chars_parser> ||
std::is_same_v<T, common_peg_space_parser> || std::is_same_v<T, common_peg_any_parser> ||
std::is_same_v<T, common_peg_json_string_parser>) {
std::is_same_v<T, common_peg_json_string_parser> ||
std::is_same_v<T, common_peg_python_dict_string_parser>) {
// These parsers do not have any children
} else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
for (auto child : p.children) {
@ -1579,6 +1695,8 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
} else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
} else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
return R"(( [^'\\] | "\\" ( ['"\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
} else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
if (p.delimiters.empty()) {
return ".*";
@ -1743,6 +1861,10 @@ static nlohmann::json serialize_parser_variant(const common_peg_parser_variant &
return json{
{ "type", "json_string" }
};
} else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
return json{
{ "type", "python_dict_string" }
};
} else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
return json{
{ "type", "until" },
@ -1876,6 +1998,9 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
if (type == "json_string") {
return common_peg_json_string_parser{};
}
if (type == "python_dict_string") {
return common_peg_python_dict_string_parser{};
}
if (type == "until") {
if (!j.contains("delimiters") || !j["delimiters"].is_array()) {
throw std::runtime_error("until parser missing or invalid 'delimiters' field");

View File

@ -211,6 +211,7 @@ struct common_peg_chars_parser {
};
struct common_peg_json_string_parser {};
struct common_peg_python_dict_string_parser {};
struct common_peg_until_parser {
std::vector<std::string> delimiters;
@ -259,6 +260,7 @@ using common_peg_parser_variant = std::variant<
common_peg_space_parser,
common_peg_chars_parser,
common_peg_json_string_parser,
common_peg_python_dict_string_parser,
common_peg_until_parser,
common_peg_schema_parser,
common_peg_rule_parser,
@ -316,9 +318,16 @@ class common_peg_parser_builder {
common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); }
common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); }
bool allow_python_dict_format_ = false;
public:
common_peg_parser_builder();
// Enable/disable Python dict format support (single-quoted strings).
// When enabled, JSON parsers will also accept Python dict-style single-quoted strings.
void set_allow_python_dict_format(bool allow) { allow_python_dict_format_ = allow; }
bool get_allow_python_dict_format() const { return allow_python_dict_format_; }
// Match nothing, always succeed.
// S -> ε
@ -424,10 +433,29 @@ class common_peg_parser_builder {
// Useful for extracting content within a JSON string.
common_peg_parser json_string_content();
// Matches a string that accepts both JSON double-quoted and Python dict single-quoted styles.
// This is useful when you explicitly want to accept both formats regardless of the allow_python_dict_format flag.
common_peg_parser flexible_string();
// Matches a Python dict-style single-quoted string content without the surrounding quotes.
// Useful for extracting content within a Python dict string.
common_peg_parser python_dict_string_content();
// Matches a JSON object member with a key and associated parser as the
// value.
common_peg_parser json_member(const std::string & key, const common_peg_parser & p);
// Creates a complete Python dict format parser supporting objects, arrays, single-quoted strings,
// numbers, booleans, and null. Similar to JSON but uses single quotes for strings.
// value -> object | array | string | number | true | false | null
common_peg_parser python_dict();
common_peg_parser python_dict_object();
common_peg_parser python_dict_string();
common_peg_parser python_dict_array();
common_peg_parser python_dict_number();
common_peg_parser python_dict_bool();
common_peg_parser python_dict_null();
// Wraps a parser with JSON schema metadata for grammar generation.
// Used internally to convert JSON schemas to GBNF grammar rules.
common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false);

View File

@ -4,7 +4,15 @@ The auto-parser automatically analyzes chat templates to determine how to parse
## Overview
The unified auto-parser uses a two-phase incremental analysis approach:
The unified auto-parser uses a **pure differential, compositional approach** to analyze chat templates:
**Core Philosophy**:
- **Zero Hardcoded Patterns**: All markers extracted through template comparison (the **only heuristic** is JSON detection)
- **Compositional Architecture**: Separate parsers for reasoning, content, and tools that compose cleanly
- **Variant Types**: Structural descriptions (strings) instead of forced enum classification
**Two-Phase Analysis**:
1. **Phase 1: Content & Reasoning Analysis** - Analyzes how the template handles basic content and reasoning, without considering tools
2. **Phase 2: Tool Call Analysis** - Analyzes tool calling patterns, layered on top of Phase 1
@ -40,73 +48,210 @@ struct content_structure {
};
```
### tool_call_structure (Phase 2 Result)
### diff_analysis_result (Analysis Result)
Describes how the template formats tool calls:
The result of differential analysis contains all extracted markers and format classifications:
```cpp
struct tool_call_structure {
bool supports_tools = false;
struct diff_analysis_result {
// Classification results
reasoning_mode reasoning = reasoning_mode::NONE;
content_mode content = content_mode::PLAIN;
tool_format tools = tool_format::NONE;
argument_format args = argument_format::JSON;
// Container markers (what wraps all tool calls)
std::string tool_section_start; // e.g., "<tool_call>", "[TOOL_CALLS]", "<TOOLCALL>", ""
std::string tool_section_end; // e.g., "</tool_call>", "]", "</TOOLCALL>", ""
// All extracted markers (see marker_registry below)
marker_registry markers;
// Function format (how individual functions are structured)
enum function_format {
FUNC_JSON_OBJECT, // {"name": "X", "arguments": {...}}
FUNC_TAG_WITH_NAME, // <function=X>{...}</function>
FUNC_TAG_NAME_ONLY, // <X>...</X> where X is function name (rare)
FUNC_PREFIXED_INDEXED, // <|tool_call_begin|>functions.X:0<|tool_call_argument_begin|>{...}<|tool_call_end|>
FUNC_NAME_AS_KEY, // [{"function_name": {...arguments...}}] (Apertus-style)
FUNC_BRACKET_TAG, // [TOOL_CALLS]X[CALL_ID]id[ARGS]{...} (Mistral Small 3.2 style)
FUNC_RECIPIENT_BASED, // >>>recipient\n{content} where recipient is "all" (content) or function name (tools)
FUNC_MARKDOWN_CODE_BLOCK, // Action:\n```json\n[{"tool_name": "X", ...}]\n``` (Cohere Command-R Plus)
};
function_format function_format = FUNC_JSON_OBJECT;
// JSON field names (for JSON-based formats)
std::string name_field = "name";
std::string args_field = "arguments";
std::string id_field;
// For FUNC_JSON_OBJECT format - field names (may vary between templates)
std::string name_field = "name"; // Could be "tool_name", "function"
std::string args_field = "arguments"; // Could be "parameters", "params", "input"
std::string id_field; // Optional: "id", "tool_call_id", ""
// For FUNC_TAG_WITH_NAME format
std::string function_prefix; // e.g., "<function="
std::string function_suffix; // e.g., ">"
std::string function_close; // e.g., "</function>"
// For FUNC_PREFIXED_INDEXED format (e.g., Kimi-K2)
std::string per_call_start; // e.g., "<|tool_call_begin|>"
std::string function_namespace; // e.g., "functions." (prefix before function name)
std::string args_marker; // e.g., "<|tool_call_argument_begin|>"
std::string per_call_end; // e.g., "<|tool_call_end|>"
// For FUNC_BRACKET_TAG format (e.g., Mistral Small 3.2)
std::string id_marker; // e.g., "[CALL_ID]" - marker before tool call ID
// For FUNC_MARKDOWN_CODE_BLOCK format (Cohere Command-R Plus)
std::string code_block_marker; // e.g., "Action:" - text marker before code block
std::string code_block_language; // e.g., "json" - language identifier in code fence
// Argument format (how arguments are structured within a function)
enum argument_format {
ARGS_JSON, // Standard JSON object: {"key": "value", ...}
ARGS_TAGGED, // XML-style: <param=key>value</param>
ARGS_KEY_VALUE_TAGS, // <arg_key>key</arg_key><arg_value>value</arg_value> (GLM-4.6)
};
argument_format argument_format = ARGS_JSON;
// For ARGS_TAGGED format
std::string arg_prefix; // e.g., "<param=", "<parameter="
std::string arg_suffix; // e.g., ">"
std::string arg_close; // e.g., "</param>", "</parameter>"
std::string arg_separator; // e.g., "", "\n"
// Flag: template renders null content as "None" string, requires empty string instead
// Flags
bool supports_tools = false;
bool supports_parallel_calls = false;
bool requires_nonnull_content = false;
// Preserved tokens for tokenizer
std::vector<std::string> preserved_tokens;
};
```
### marker_registry (Extracted Markers)
All markers are extracted via differential analysis without hardcoded patterns:
```cpp
struct marker_registry {
// === Reasoning markers ===
std::string reasoning_start; // e.g., "<think>", "[THINK]", "<|START_THINKING|>"
std::string reasoning_end; // e.g., "</think>", "[/THINK]", "<|END_THINKING|>"
// === Content markers ===
std::string content_start; // e.g., "<response>", ">>>all\n"
std::string content_end; // e.g., "</response>"
// === Tool section markers ===
std::string tool_section_start; // e.g., "<tool_call>", "[TOOL_CALLS]"
std::string tool_section_end; // e.g., "</tool_call>", "]"
std::string per_call_start; // e.g., "\u2985" (for multi-call templates)
std::string per_call_end; // e.g., " \u2985"
std::string call_separator; // e.g., ",", "\n"
// === Function markers ===
std::string func_name_prefix; // e.g., "<function=", "\"name\": \""
std::string func_name_suffix; // e.g., ">", "\""
std::string func_close; // e.g., "</function>"
std::string args_start; // e.g., "{", " \u300b"
std::string args_end; // e.g., "}", ""
// === Argument markers (for tagged args format) ===
std::string arg_name_prefix; // e.g., "<param=", "<arg_key>"
std::string arg_name_suffix; // e.g., ">", "</arg_key>"
std::string arg_value_prefix; // e.g., "", "<arg_value>"
std::string arg_value_suffix; // e.g., "</param>", "</arg_value>"
std::string arg_separator;
// === Special markers ===
std::string code_block_marker; // e.g., "Action:" (markdown code block format)
std::string id_marker; // e.g., "[CALL_ID]" (bracket-tag format)
std::string function_namespace; // e.g., "functions." (prefixed-indexed format)
};
```
## Tool Calling Formats
The auto-parser recognizes three primary tool calling formats. Other formats may be deprecated in future versions.
### JSON_NATIVE
**Structure**: The entire tool call (function name, arguments, and values) is in JSON format. There may be enclosing tags around the tool calling section.
**Characteristics**:
- Function name is a JSON field: `"name": "function_name"`
- Arguments are a JSON object: `"arguments": {"key": "value"}`
- May be wrapped in section markers like `<tool_call>...</tool_call>` or `[TOOL_CALLS]...]`
**Examples**:
Standard OpenAI-style:
```json
<tool_call>
{"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}}
</tool_call>
```
Mistral Nemo with array wrapper:
```json
[TOOL_CALLS]
[{"name": "calculate", "arguments": {"expr": "2+2"}}]
```
Hermes-style with tool_calls wrapper:
```json
<tool_calls>
{"name": "search", "arguments": {"query": "llama.cpp"}}
</tool_calls>
```
**Detection**: `args_start == "{"`, `args_end == "}"`, no function name prefix markers
---
### TAG_WITH_JSON
**Structure**: The function name is outside the JSON structure, typically within quasi-XML markers. Arguments are still provided as a JSON object.
**Characteristics**:
- Function name appears in tag attributes: `<function=function_name>` or `<tool_call name="function_name">`
- Arguments are a JSON object following the tag
- Has closing tags: `</function>` or `</tool_call>`
- Arguments remain valid JSON
**Examples**:
Nemotron-style:
```xml
<TOOLCALL>get_weather{"location": "Paris"}</TOOLCALL>
```
Functionary v3.1:
```xml
<function=get_weather>{"location": "Paris", "unit": "celsius"}</function>
```
ByteDance Seed-OSS:
```xml
<seed:tool_call>
<tool_name>get_weather</tool_name>
<parameters>{"location": "Paris"}</parameters>
</seed:tool_call>
```
MiniMax:
```xml
<minimax:tool_call>
<tool_name>calculate</tool_name>
<arguments>{"expr": "2+2"}</arguments>
</minimax:tool_call>
```
**Detection**: `func_name_prefix` starts with `<`, `args_start == "{"`, arguments are JSON
---
### TAG_WITH_TAGGED
**Structure**: Both the function name AND argument names are in XML-style tags. Argument values may be JSON or unquoted primitives depending on schema type.
**Characteristics**:
- Function name in tag: `<function=name>` or `<invoke=name>`
- Each argument has its own tag: `<param=key>value</param>`
- String values are **unquoted** (raw text content of the tag)
- Non-string values (objects, arrays, numbers, booleans) are still JSON-formatted
- Supports streaming: partial arguments can be parsed incrementally
**Examples**:
Qwen/Hermes XML format:
```xml
<function=get_weather>
<param=location>Paris</param>
<param=unit>celsius</param>
</function>
```
Note how string values (`Paris`, `celsius`) are unquoted inside the tags.
Mixed types example:
```xml
<function=calculate>
<param=expr>2+2</param>
<param=precision>2</param>
<param=options>{"round": true}</param>
</function>
```
Here:
- `expr` and `precision` are strings (unquoted)
- `options` is an object (JSON-formatted inside the tag)
**Detection**: `arg_name_prefix` is non-empty, arguments use tagged format rather than JSON object
---
### Other Formats (To Be Deprecated)
The following formats are currently supported but will likely be deprecated:
| Format | Description | Example |
|--------|-------------|---------|
| `BRACKET_TAG` | Bracket-based markers | `[TOOL_CALLS]func[ARGS]{...}` |
| `PREFIXED_INDEXED` | Namespace prefix with index | `functions.name:0{...}` |
| `RECIPIENT_BASED` | Recipient routing | `>>>recipient\n{content}` |
| `MARKDOWN_BLOCK` | Markdown code blocks | `Action:\n\`\`\`json\n[...]` |
## Analysis Flow
```console
@ -129,13 +274,13 @@ Phase 2: analyze_tool_structure()
|-- Classify argument format (JSON vs tagged)
|
v
tool_call_structure
diff_analysis_result
|
v
generate_parser(content_structure, tool_call_structure)
|-- build_reasoning_block(content_structure)
|-- build_content_block(content_structure)
|-- build_tool_section(tool_call_structure, tools)
generate_parser(diff_analysis_result)
|-- build_reasoning_block(diff_analysis_result)
|-- build_content_block(diff_analysis_result)
|-- build_tool_section(diff_analysis_result, tools)
|-- Compose into final parser
|
v
@ -148,14 +293,13 @@ The mechanism starts in `common/chat.cpp`, in `common_chat_templates_apply_jinja
```cpp
// 1. Analyze the template (two-phase)
template_analysis_result analysis = template_analyzer::analyze_template(tmpl);
auto analysis = differential_analyzer::analyze(tmpl);
// 2. Generate the parser and grammar
auto auto_params = universal_peg_generator::generate_parser(analysis, tmpl, params);
auto auto_params = universal_peg_generator::generate_parser(tmpl, params);
// 3. Use if it provides more than basic content handling
if (auto_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY ||
auto_params.thinking_forced_open ||
!auto_params.parser.empty()) {
return auto_params;
}
@ -165,32 +309,32 @@ if (auto_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY ||
The unified builder (`common_chat_peg_unified_builder`) provides high-level methods:
- `build_reasoning_block(cs, reasoning_format, thinking_forced_open)` - Build reasoning parser
- `build_content_block(cs, reasoning_format)` - Build content parser
- `build_tool_section(ts, tools, parallel_tool_calls, force_tool_calls)` - Build tool section
- `build_function(ts, name, schema)` - Build single function parser
- `build_arguments(ts, schema)` - Build arguments parser
- `build_reasoning_block(analysis, reasoning_format, thinking_forced_open)` - Build reasoning parser
- `build_content_block(analysis, reasoning_format)` - Build content parser
- `build_tool_section(analysis, tools, parallel_tool_calls, force_tool_calls)` - Build tool section
- `build_function(analysis, name, schema)` - Build single function parser
- `build_arguments(analysis, schema)` - Build arguments parser
## Key Templates Supported
- **Granite** - `<think></think>` + `<response></response>` with tool calls
- **Nemotron** - JSON tools with `<TOOLCALL>` wrapper
- **Qwen/Hermes** - XML-style `<function=X><param=key>` format
- **Qwen/Hermes** - XML-style `<function=X><param=key>` format (TAG_WITH_TAGGED)
- **Command-R7B** - `<|START_THINKING|>`/`<|START_RESPONSE|>` + `<|START_ACTION|>` tools
- **DeepSeek R1** - Forced thinking + complex tools
- **Mistral Nemo** - `[TOOL_CALLS]` wrapper
- **MiniMax** - `<minimax:tool_call>` wrapper with XML tools
- **Mistral Nemo** - `[TOOL_CALLS]` wrapper (JSON_NATIVE)
- **MiniMax** - `<minimax:tool_call>` wrapper with JSON args (TAG_WITH_JSON)
- **GLM-4.6** - `<minimax:tool_call>` + `<tool_call>name\n<arg_key>...<arg_value>...` format
- **Kimi-K2** - `FUNC_PREFIXED_INDEXED` format with namespace and indices
- **Mistral Small 3.2** - `FUNC_BRACKET_TAG` format with `[TOOL_CALLS]` markers
- **Functionary v3.2** - `FUNC_RECIPIENT_BASED` format with `>>>` routing
- **Kimi-K2** - `PREFIXED_INDEXED` format with namespace and indices
- **Mistral Small 3.2** - `BRACKET_TAG` format with `[TOOL_CALLS]` markers
- **Functionary v3.2** - `RECIPIENT_BASED` format with `>>>` routing
## Files
| File | Purpose |
|------|---------|
| `common/chat-auto-parser.h` | Data structures and API declarations |
| `common/chat-auto-parser-analyzer.cpp` | Phase 1 and Phase 2 analysis implementation |
| `common/chat-diff-analyzer.h/cpp` | Differential analysis implementation |
| `common/chat-auto-parser-generator.cpp` | PEG parser generator |
| `common/chat-auto-parser-helpers.h/cpp` | Shared helper functions |
| `common/chat-peg-parser.h/cpp` | Unified builder and mapper classes |
@ -205,7 +349,7 @@ The unified builder (`common_chat_peg_unified_builder`) provides high-level meth
**Method 1: Differential Reasoning Content Analysis**
- Render template with `reasoning_content` field present vs absent
- Compare outputs to find markers between `THOUGHT_MARKER` and `CONTENT_MARKER`
- Compare outputs to find markers between reasoning and content
- If only closing tag found, derive opening tag using patterns:
- XML: `</tag>``<tag>`
- Special tokens: `<|END_X|>``<|START_X|>`, `<|/X|>``<|X|>`
@ -260,85 +404,121 @@ The unified builder (`common_chat_peg_unified_builder`) provides high-level meth
### Phase 2: Tool Call Structure Analysis
#### Differential Analysis Algorithm
#### Pure Differential Analysis Algorithm
**Test Payload Strategy**:
**Key Principle**: All patterns are extracted through template comparison. The **only heuristic** is detecting JSON vs marker-based structures (via JSON parse attempt). No hardcoded pattern lists.
1. **Base**: User + Assistant with content only (no tools)
2. **Tool 1**: User + Assistant with tool_calls (empty args)
3. **Tool 2**: User + Assistant with tool_calls (with args)
4. **Tool 3**: User + Assistant with multiple tool calls
**Comparison Matrix**:
**Pattern Extraction Process**:
| Comparison | Purpose | What's Extracted |
|------------|---------|------------------|
| **T1**: No tools vs tools | Tool section markers | `tool_section_start`, `tool_section_end` |
| **T2**: 1 call vs 2 calls | Call separators | `per_call_start`, `call_separator` |
| **T3**: func_alpha vs func_beta | Function boundaries | `func_name_prefix`, `func_name_suffix` |
| **T4**: 1 arg vs 2 args | Argument separator | `arg_separator` |
| **T5**: No args vs args | Args container | `args_start`, `args_end` |
| **A1**: key1 vs key2 | Arg name boundaries | `arg_name_prefix`, `arg_name_suffix` |
| **A2**: value A vs B | Arg value boundaries | `arg_value_prefix`, `arg_value_suffix` |
| **A3**: number vs string | Quoting behavior | Value type handling |
1. Compute string differences between base and tool outputs
2. Use `test_function_name` as reliable search anchor (using `rfind` for last occurrence)
3. Extract structural elements:
- `tool_call_opener`: Common prefix before function name
- `tool_call_closer`: Common suffix after function calls
- `function_opener`: Tag immediately before function name
- `function_closer`: Tag after function content
- `parameter_key_prefix/suffix`: Argument wrapping patterns
**Structural Extraction Helpers**:
#### Format Classification Logic
```cpp
// Extract last structural marker from string (finds last <, [, {, or ")
std::string extract_structural_suffix(const std::string & str);
**FORMAT_JSON_NATIVE**:
// Extract first structural marker from string (finds first >, ], }, or ")
std::string extract_structural_prefix(const std::string & str);
- Detected by `{"name":` pattern in `tool_call_opener`
- Or XML markers with JSON structure
// The only heuristic: detect if content is valid JSON
bool is_json_based(const std::string & content);
```
**FORMAT_XML_CONSTRUCTED**:
**Pattern Extraction Process** (Example - T1: Tool Section Markers):
- `function_opener` starts with `<`
- No substantial parameter markers
1. Render template with/without tool calls
2. Compute diff: `calculate_diff_split(output_no_tools, output_with_tools)`
3. Use controlled function name (`func_alpha`) as anchor in `diff.right`
4. Extract structural prefix before function name → `tool_section_start`
5. Extract structural suffix after tool content → `tool_section_end`
**FORMAT_RECIPIENT_BASED**:
**No Pattern Lists**: Unlike the old approach, there are no hardcoded lists like `["<tool_call>", "[TOOL_CALLS]", ...]`. All markers are discovered through differential comparison.
- `tool_call_start_marker == function_opener`
- No parameter markers
- Opener doesn't start with structural chars
#### Variant Detection Logic
**FORMAT_BRACKET_TAG**:
Instead of forcing patterns into enum types, the analyzer detects **variant types** as strings that describe the structural characteristics:
- `function_name_suffix` contains bracket tags like `[CALL_ID]...[ARGS]`
- `tool_call_start_marker` matches `[TOOL_CALLS]` pattern
**Variant Types**:
**FORMAT_PREFIXED_INDEXED**:
- `"json-native"`: Pure JSON tool calls (Llama, Mistral Nemo)
- `"tagged-json"`: Function name in markers, args in JSON (Functionary v3.1, Nemotron)
- `"tagged-args"`: Full XML-style with tagged arguments (Qwen, Hermes, MiniMax)
- `"bracket-tag"`: Bracket markers (Mistral Small 3.2: `[TOOL_CALLS]func[ARGS]{...}`)
- `"recipient-based"`: Recipient routing (Functionary v3.2: `>>>func_name`)
- `"markdown-block"`: Markdown code blocks (Cohere Command-R Plus)
- `"prefixed-indexed"`: Namespace prefix with indices (Kimi-K2: `functions.name:0`)
- `function_opener` ends with `.` (namespace separator)
- `function_name_suffix` starts with `:` followed by digit
- Example: `functions.name:0<|tool_call_argument_begin|>`
**Detection Strategy** (from most to least distinctive):
#### Specialized Format Handling
```cpp
void detect_tool_variant(diff_analysis_result & result) {
// 1. Check for unique markers (most distinctive)
if (!result.markers.id_marker.empty())
→ "bracket-tag"
**FUNC_PREFIXED_INDEXED (Kimi-K2)**:
if (markers contain ">>>")
→ "recipient-based"
- Splits `function_opener` at last `>` to get `per_call_start` + `function_namespace`
- Extracts `args_marker` from `function_name_suffix`
- Derives `per_call_end` by matching structural patterns in `tool_call_closer`
if (code_block_marker present)
→ "markdown-block"
**FUNC_TAG_WITH_NAME (Functionary/Nemotron)**:
if (function_namespace or suffix contains ':')
→ "prefixed-indexed"
- Detects nested vs non-nested formats
- Uses overlap detection between `tool_section_start` and `function_prefix`
- Handles double-wrapping prevention
// 2. Check argument structure (JSON variants)
if (arg_name_prefix starts with '<')
→ "tagged-args"
**ARGS_KEY_VALUE_TAGS (GLM-4.6)**:
if (func_name_prefix starts with '<')
→ "tagged-json"
- Detects `<arg_key>key</arg_key><arg_value>value</arg_value>` pattern
- Cleans up suffix to extract just the key closer
// 3. Default
→ "json-native"
}
```
**FUNC_RECIPIENT_BASED (Functionary v3.2)**:
#### Compositional Parser Building
- Detects `>>>` recipient delimiter format
- Routes to "all" for content, function name for tools
- Uses same delimiter for both content and tool routing
The analyzer builds separate, composable parsers for each component:
**FUNC_BRACKET_TAG (Mistral Small 3.2/Devstral)**:
**Reasoning Parser**:
- Detects `[TOOL_CALLS]function_name[ARGS]{...}` pattern
- Optional `[CALL_ID]id` marker for tool call identification
- No section wrapper - each call starts independently
- Built from `reasoning_start` and `reasoning_end` markers
- Supports tag-based, delimiter, and forced-open modes
**Content Parser**:
- Built from `content_start` and `content_end` markers
- Supports plain, always-wrapped, and conditionally-wrapped modes
**Tool Parser** (variant-specific):
- Built based on `variant_type` detection
- Each variant has its own builder that uses the extracted markers
- No enum forcing - structure preserved as discovered
**Final Composition**:
```cpp
sequence({
reasoning_parser,
space(),
content_parser,
space(),
tool_parser,
end()
})
```
### Generator Algorithms
@ -386,13 +566,13 @@ The test suite covers:
**Tool Call Formats**:
- JSON: Llama 3.x, Mistral Nemo, Hermes, MiMo-VL
- XML: Nemotron, Qwen3-Coder, MiniMax
- Tagged: GLM-4.6 (key-value tags)
- Bracket-tag: Mistral Small 3.2, Devstral
- Prefixed-indexed: Kimi-K2 variants
- Name-as-key: Apertus-8B
- Recipient-based: Functionary v3.2
- JSON_NATIVE: Llama 3.x, Mistral Nemo, Hermes, MiMo-VL
- TAG_WITH_JSON: Nemotron, Qwen3-Coder, MiniMax
- TAG_WITH_TAGGED: Qwen, Hermes (XML), ByteDance Seed-OSS
- BRACKET_TAG: Mistral Small 3.2, Devstral
- PREFIXED_INDEXED: Kimi-K2 variants
- RECIPIENT_BASED: Functionary v3.2
- MARKDOWN_BLOCK: Cohere Command-R Plus
**Edge Cases**:
@ -433,11 +613,11 @@ tst.test("input")
To support a new template format:
1. **If it follows standard patterns** - The auto-parser should detect it automatically
2. **If it has unique markers** - Add the markers to the detection patterns in:
- `detect_reasoning_markers()` for reasoning tags
- `detect_content_markers()` for content wrappers
- `extract_patterns_from_differences()` for tool call patterns
1. **If it follows standard patterns** - The auto-parser should detect it automatically using the three main formats (JSON_NATIVE, TAG_WITH_JSON, TAG_WITH_TAGGED)
2. **If it has unique markers** - Add differential analysis patterns in:
- `compare_reasoning_presence()` for reasoning tags
- `compare_content_values()` for content wrappers
- `extract_tool_section()` for tool call patterns
3. **If it needs special handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block
## Edge Cases and Quirks
@ -458,28 +638,28 @@ The following templates have active tests in `tests/test-chat.cpp`:
| Template | Format | Notes |
|----------|--------|-------|
| DeepSeek V3.1 | `FUNC_JSON_OBJECT` | Forced thinking mode |
| DeepSeek V3.1 | `JSON_NATIVE` | Forced thinking mode |
| DeepSeek R1 Distill (Llama/Qwen) | Reasoning only | Forced-open thinking |
| llama-cpp-deepseek-r1 | Reasoning only | Forced-open thinking |
| GLM-4.6 | `ARGS_KEY_VALUE_TAGS` | `<tool_call>name\n<arg_key>...<arg_value>...` format |
| Kimi-K2 / Kimi-K2-Instruct / Kimi-K2-Thinking | `FUNC_PREFIXED_INDEXED` | `functions.name:0` with special markers |
| Apertus-8B-Instruct | `FUNC_NAME_AS_KEY` | `{"function_name": {...}}` format |
| MiniMax-M2 | `FUNC_TAG_WITH_NAME` | XML invoke with parameter tags |
| NVIDIA-Nemotron-Nano-v2 | `FUNC_JSON_OBJECT` | `<TOOLCALL>` wrapper (nested) |
| Mistral-Nemo-Instruct-2407 | `FUNC_JSON_OBJECT` | `[TOOL_CALLS]` wrapper with id field |
| Functionary v3.1 | `FUNC_TAG_WITH_NAME` | `<function=X>` non-nested format |
| Functionary v3.2 | `FUNC_RECIPIENT_BASED` | `>>>` recipient delimiter format |
| MiMo-VL / Hermes 3 / Qwen 2.5 | `FUNC_JSON_OBJECT` | `<tool_call>` wrapper |
| Apriel 1.5 | `FUNC_JSON_OBJECT` | `<tool_calls>` wrapper with JSON array |
| GLM-4.6 | `TAGGED` | `<tool_call>name\n<arg_key>...<arg_value>...` format |
| Kimi-K2 / Kimi-K2-Instruct / Kimi-K2-Thinking | `PREFIXED_INDEXED` | `functions.name:0` with special markers |
| Apertus-8B-Instruct | `NAME_AS_KEY` | `{"function_name": {...}}` format |
| MiniMax-M2 | `TAG_WITH_JSON` | XML invoke with parameter tags |
| NVIDIA-Nemotron-Nano-v2 | `JSON_NATIVE` | `<TOOLCALL>` wrapper (nested) |
| Mistral-Nemo-Instruct-2407 | `JSON_NATIVE` | `[TOOL_CALLS]` wrapper with id field |
| Functionary v3.1 | `TAG_WITH_JSON` | `<function=X>` non-nested format |
| Functionary v3.2 | `RECIPIENT_BASED` | `>>>` recipient delimiter format |
| MiMo-VL / Hermes 3 / Qwen 2.5 | `JSON_NATIVE` | `<tool_call>` wrapper |
| Apriel 1.5 | `JSON_NATIVE` | `<tool_calls>` wrapper with JSON array |
| Apriel 1.6 Thinker | Reasoning only | Implicit reasoning start |
| Cohere Command-R7B | `FUNC_JSON_OBJECT` | `START_RESPONSE/ACTION/THINKING` markers |
| Mistral Small 3.2 | `FUNC_BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` with ID |
| Devstral | `FUNC_BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` without ID |
| Cohere Command-R7B | `JSON_NATIVE` | START_RESPONSE/ACTION/THINKING markers |
| Mistral Small 3.2 | `BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` with ID |
| Devstral | `BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` without ID |
| Ministral-3-14B-Reasoning | Custom reasoning | `[THINK]...[/THINK]` tags |
| IBM Granite | `FUNC_JSON_OBJECT` | `<think></think>` + `<response></response>` |
| ByteDance Seed-OSS | `FUNC_TAG_WITH_NAME` | Custom `<seed:think>` and `<seed:tool_call>` tags |
| Qwen3-Coder | `FUNC_TAG_WITH_NAME` | XML-style tool format |
| Cohere Command-R Plus | `FUNC_MARKDOWN_CODE_BLOCK` | `Action:\n\`\`\`json\n[...]\n\`\`\`` format |
| IBM Granite | `JSON_NATIVE` | `<think></think>` + `<response></response>` |
| ByteDance Seed-OSS | `TAG_WITH_TAGGED` | Custom `<seed:think>` and `<seed:tool_call>` tags |
| Qwen3-Coder | `TAG_WITH_TAGGED` | XML-style tool format |
| Cohere Command-R Plus | `MARKDOWN_BLOCK` | `Action:\n`\`\`\`json\n[...]\n`\`\`` format |
### Currently Unsupported Templates
@ -496,18 +676,25 @@ Some templates genuinely don't support tool calls (this is not a detection bug):
### TODO / Roadmap
- [ ] **Fix OpenAI GPT-OSS**: Add `FUNC_CHANNEL_BASED` format for channel marker structure.
- [x] **~~Fix Cohere Command-R Plus~~**: Added `FUNC_MARKDOWN_CODE_BLOCK` format for `Action:\n\`\`\`json` structure.
- [ ] **Fix OpenAI GPT-OSS**: Add handling for channel marker structure.
- [x] **~~Fix Cohere Command-R Plus~~**: Added `MARKDOWN_BLOCK` format for `Action:\n`\`\`\`json` structure.
### Recent Additions (Dec 2025 - Jan 2026)
- **FUNC_RECIPIENT_BASED**: Support for Functionary v3.2's `>>>` recipient delimiter format
- **FUNC_BRACKET_TAG**: Support for Mistral Small 3.2 and Devstral's `[TOOL_CALLS]...` format
- **RECIPIENT_BASED**: Support for Functionary v3.2's `>>>` recipient delimiter format
- **BRACKET_TAG**: Support for Mistral Small 3.2 and Devstral's `[TOOL_CALLS]...` format
- **Enhanced Content Detection**: Better handling of custom reasoning tags and content wrappers
- **Improved Streaming Support**: Better handling of partial parsing for all supported formats
- **Custom Tag Support**: Support for non-standard reasoning tags like `<seed:think>` (ByteDance)
- **Multi-line Tool Arguments**: Better parsing of complex tool arguments with code blocks
- **FUNC_MARKDOWN_CODE_BLOCK**: Support for Cohere Command-R Plus markdown code block format
- **MARKDOWN_BLOCK**: Support for Cohere Command-R Plus markdown code block format
- **Implicit Reasoning Support**: Support for templates where reasoning starts implicitly without a start marker.
- **Pure Differential Refactoring (Jan 2026)**: Complete refactoring to eliminate hardcoded patterns:
- Removed all hardcoded pattern lists (previously had `["<tool_call>", "[TOOL_CALLS]", ...]`)
- Added structural extraction helpers (`extract_structural_suffix`, `extract_structural_prefix`)
- Replaced enum-based classification with string-based variant types
- Only remaining heuristic: JSON detection via parse attempt
- All markers now discovered through differential template comparison
- **Three Primary Tool Formats**: Consolidated tool calling formats to JSON_NATIVE, TAG_WITH_JSON, and TAG_WITH_TAGGED for clarity and maintainability
The auto-parser now successfully handles 25+ different template formats across reasoning-only, tool-calling, and hybrid models, with comprehensive test coverage ensuring robust parsing across streaming and non-streaming scenarios.

View File

@ -132,7 +132,7 @@ The following instructions take precedence over instructions in the default prea
{%- elif message.role|lower == 'user' %}
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
{%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.reasoning_content}}<|END_THINKING|><|START_ACTION|>[
{% for tc in message.tool_calls %}
{"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}

View File

@ -11,7 +11,7 @@
{%- if message['role'] == 'user' -%}
{%- set ns.is_tool = false -%}{{'<User>' + message['content']}}
{%- endif -%}
{%- if message['role'] == 'assistant' and message['content'] is none -%}
{%- if message['role'] == 'assistant' and message['tool_calls'] -%}
{%- set ns.is_tool = false -%}
{%- for tool in message['tool_calls']-%}
{%- if not ns.is_first -%}

7774
template.ans Normal file

File diff suppressed because it is too large Load Diff

View File

@ -186,6 +186,7 @@ endif()
llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
llama_build_and_test(test-jinja.cpp)
llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
llama_build_and_test(test-chat-auto-parser.cpp)
llama_build_and_test(test-json-partial.cpp)
llama_build_and_test(test-log.cpp)
llama_build_and_test(
@ -195,6 +196,7 @@ llama_build_and_test(
peg-parser/test-gbnf-generation.cpp
peg-parser/test-json-parser.cpp
peg-parser/test-json-serialization.cpp
peg-parser/test-python-dict-parser.cpp
peg-parser/test-unicode.cpp
peg-parser/tests.h
)

View File

@ -0,0 +1,279 @@
#include "tests.h"
void test_python_dict_parser(testing &t) {
// Test parsing a simple Python dict object with single quotes
t.test("simple Python dict object parsing", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{'name': 'test', 'value': 42, 'flag': true}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
t.assert_equal("result_end", input.size(), result.end);
});
// Test parsing a Python dict array with mixed types
t.test("Python dict array with mixed types", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "[1, 'hello', true, null, 3.14]";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
t.assert_equal("result_end", input.size(), result.end);
});
// Test parsing nested Python dict with objects and arrays
t.test("nested Python dict with objects and arrays", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input =
"{'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 'count': 2, 'metadata': {'version': '1.0', 'tags': ['admin', 'user']}}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
t.assert_equal("result_end", input.size(), result.end);
});
// Test parsing Python dict with escaped single quotes
t.test("Python dict with escaped single quotes", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{'message': 'It\\'s working!'}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
t.assert_equal("result_end", input.size(), result.end);
});
// Test parsing Python dict with double quotes inside single quotes
t.test("Python dict with double quotes inside single quotes", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{'quote': 'He said \"Hello\"'}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
t.assert_equal("result_end", input.size(), result.end);
});
// Test the example from the requirements
t.test("complex Python dict example from requirements", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
t.assert_equal("result_end", input.size(), result.end);
});
// Test need_more_input() parsing - incomplete object
t.test("need_more_input() parsing - incomplete object", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{'name': 'test', 'value': ";
common_peg_parse_context ctx(input, true);
auto result = parser.parse(ctx);
t.assert_equal("result_is_need_more_input", true, result.need_more_input());
});
// Test need_more_input() parsing - incomplete single-quoted string
t.test("need_more_input() parsing - incomplete single-quoted string", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{'name': 'test";
common_peg_parse_context ctx(input, true);
auto result = parser.parse(ctx);
t.assert_equal("result_is_need_more_input", true, result.need_more_input());
});
// Test unicode in Python dict strings
t.test("unicode in Python dict strings", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{'message': 'Hello, 世界!'}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
t.assert_equal("result_end", input.size(), result.end);
});
// Test Python dict with unicode escapes
t.test("Python dict with unicode escapes", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{'unicode': 'Hello\\u0041'}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
t.assert_equal("result_end", input.size(), result.end);
});
// Test that JSON double-quoted strings fail with Python dict parser
t.test("JSON double-quoted strings fail with Python dict parser", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
std::string input = "{\"name\": \"test\"}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_equal("result_is_fail", true, result.fail());
});
// Test Python dict string content parser directly
t.test("python dict string content parser", [](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
return p.sequence({ p.literal("'"), p.python_dict_string_content(), p.literal("'"), p.space() });
});
t.test("simple string", [&](testing &t) {
std::string input = "'hello'";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_true("success", result.success());
t.assert_equal("end", input.size(), result.end);
});
t.test("string with escaped single quote", [&](testing &t) {
std::string input = "'it\\'s'";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_true("success", result.success());
t.assert_equal("end", input.size(), result.end);
});
t.test("string with double quotes", [&](testing &t) {
std::string input = "'say \"hello\"'";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_true("success", result.success());
t.assert_equal("end", input.size(), result.end);
});
t.test("incomplete string", [&](testing &t) {
std::string input = "'hello";
common_peg_parse_context ctx(input, true);
auto result = parser.parse(ctx);
t.assert_true("need_more_input", result.need_more_input());
});
});
// Test allow_python_dict_format flag usage
t.test("allow_python_dict_format flag", [](testing &t) {
t.test("flag is false by default", [&](testing &t) {
common_peg_parser_builder builder;
t.assert_equal("default_value", false, builder.get_allow_python_dict_format());
});
t.test("flag can be set to true", [&](testing &t) {
common_peg_parser_builder builder;
builder.set_allow_python_dict_format(true);
t.assert_equal("after_set", true, builder.get_allow_python_dict_format());
});
t.test("flag can be set back to false", [&](testing &t) {
common_peg_parser_builder builder;
builder.set_allow_python_dict_format(true);
builder.set_allow_python_dict_format(false);
t.assert_equal("after_reset", false, builder.get_allow_python_dict_format());
});
});
// Test that the flag actually affects json() parser behavior
t.test("json() parser with allow_python_dict_format flag", [](testing &t) {
t.test("json() rejects single quotes when flag is false", [&](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
p.set_allow_python_dict_format(false);
return p.json();
});
std::string input = "{'name': 'test'}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_true("fail", result.fail());
});
t.test("json() accepts single quotes when flag is true", [&](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
p.set_allow_python_dict_format(true);
return p.json();
});
std::string input = "{'name': 'test'}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_true("success", result.success());
t.assert_equal("end", input.size(), result.end);
});
t.test("json() still accepts double quotes when flag is true", [&](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
p.set_allow_python_dict_format(true);
return p.json();
});
std::string input = "{\"name\": \"test\"}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_true("success", result.success());
t.assert_equal("end", input.size(), result.end);
});
t.test("json() accepts mixed quote styles when flag is true", [&](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
p.set_allow_python_dict_format(true);
return p.json();
});
std::string input = "{\"name\": 'test', 'value': \"hello\"}";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_true("success", result.success());
t.assert_equal("end", input.size(), result.end);
});
t.test("complex nested structure with flag true", [&](testing &t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
p.set_allow_python_dict_format(true);
return p.json();
});
std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
common_peg_parse_context ctx(input);
auto result = parser.parse(ctx);
t.assert_true("success", result.success());
t.assert_equal("end", input.size(), result.end);
});
});
}

View File

@ -22,3 +22,4 @@ void test_json_parser(testing &t);
void test_gbnf_generation(testing &t);
void test_unicode(testing &t);
void test_json_serialization(testing &t);
void test_python_dict_parser(testing &t);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@ int main(int argc, char *argv[]) {
t.test("json", test_json_parser);
t.test("gbnf", test_gbnf_generation);
t.test("serialization", test_json_serialization);
t.test("python-dict", test_python_dict_parser);
return t.summary();
}

View File

@ -6,3 +6,12 @@ target_compile_features(${TARGET} PRIVATE cxx_std_17)
if(LLAMA_TOOLS_INSTALL)
install(TARGETS ${TARGET} RUNTIME)
endif()
set(TARGET llama-template-analysis)
add_executable(${TARGET} template-analysis.cpp)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
if(LLAMA_TOOLS_INSTALL)
install(TARGETS ${TARGET} RUNTIME)
endif()

View File

@ -1,15 +1,19 @@
#include "../src/llama-grammar.h"
#include "chat-auto-parser.h"
#include "chat-diff-analyzer.h"
#include "chat.h"
#include "common.h"
#include "gguf.h"
#include "jinja/runtime.h"
#include "log.h"
#include <fstream>
#include <numeric>
#include <sstream>
#include <string>
#include "nlohmann/json.hpp"
#include "peg-parser.h"
using json = nlohmann::ordered_json;
@ -239,7 +243,7 @@ static json build_tools_definition() {
{ "type", "string" },
{ "description", "Second parameter" }
});
parameters_schema["required"] = json::array({ "param1", "param2" });
parameters_schema["required"] = json::array({ "param1" });
return json::array({
json{ { "type", "function" },
@ -324,68 +328,21 @@ static void render_all_scenarios(const common_chat_template & tmpl,
}
}
static const char * reasoning_mode_to_str(content_structure::reasoning_mode_type mode) {
switch (mode) {
case content_structure::REASONING_NONE:
return "NONE";
case content_structure::REASONING_OPTIONAL:
return "OPTIONAL";
case content_structure::REASONING_FORCED_OPEN:
return "FORCED_OPEN";
}
return "UNKNOWN";
}
static const char * content_mode_to_str(content_structure::content_mode_type mode) {
switch (mode) {
case content_structure::CONTENT_PLAIN:
return "PLAIN";
case content_structure::CONTENT_ALWAYS_WRAPPED:
return "ALWAYS_WRAPPED";
case content_structure::CONTENT_WRAPPED_WITH_REASONING:
return "WRAPPED_WITH_REASONING";
}
return "UNKNOWN";
}
static const char * function_format_to_str(enum tool_call_structure::function_format fmt) {
switch (fmt) {
case tool_call_structure::FUNC_JSON_OBJECT:
return "JSON_OBJECT";
case tool_call_structure::FUNC_TAG_WITH_NAME:
return "TAG_WITH_NAME";
case tool_call_structure::FUNC_TAG_NAME_ONLY:
return "TAG_NAME_ONLY";
case tool_call_structure::FUNC_PREFIXED_INDEXED:
return "PREFIXED_INDEXED";
case tool_call_structure::FUNC_NAME_AS_KEY:
return "NAME_AS_KEY";
case tool_call_structure::FUNC_BRACKET_TAG:
return "BRACKET_TAG";
case tool_call_structure::FUNC_RECIPIENT_BASED:
return "RECIPIENT_BASED";
case tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK:
return "MARKDOWN_CODE_BLOCK";
}
return "UNKNOWN";
}
static const char * argument_format_to_str(enum tool_call_structure::argument_format fmt) {
switch (fmt) {
case tool_call_structure::ARGS_JSON:
return "JSON";
case tool_call_structure::ARGS_TAGGED:
return "TAGGED";
case tool_call_structure::ARGS_KEY_VALUE_TAGS:
return "KEY_VALUE_TAGS";
}
return "UNKNOWN";
template <typename T>
static std::string mode_to_str(T mode) {
std::ostringstream os;
os << mode;
return os.str();
}
int main(int argc, char ** argv) {
// Set log level to most verbose to capture all debug output
common_log_set_verbosity_thold(99);
if (std::getenv("LLAMA_DEBUG_JINJA") != nullptr) {
jinja::enable_debug(true);
}
debug_options opts;
if (!parse_options(argc, argv, opts)) {
return 1;
@ -434,48 +391,7 @@ int main(int argc, char ** argv) {
LOG_ERR(" TEMPLATE ANALYSIS\n");
LOG_ERR("================================================================================\n");
template_analysis_result analysis = template_analyzer::analyze_template(chat_template);
LOG_ERR("\n=== Analysis Results ===\n");
LOG_ERR("\n--- Content Structure (Phase 1) ---\n");
LOG_ERR("reasoning_mode: %s\n", reasoning_mode_to_str(analysis.content.reasoning_mode));
LOG_ERR("reasoning_start: '%s'\n", analysis.content.reasoning_start.c_str());
LOG_ERR("reasoning_end: '%s'\n", analysis.content.reasoning_end.c_str());
LOG_ERR("content_mode: %s\n", content_mode_to_str(analysis.content.content_mode));
LOG_ERR("content_start: '%s'\n", analysis.content.content_start.c_str());
LOG_ERR("content_end: '%s'\n", analysis.content.content_end.c_str());
LOG_ERR("\n--- Tool Structure (Phase 2) ---\n");
LOG_ERR("supports_tools: %s\n", analysis.tools.supports_tools ? "true" : "false");
LOG_ERR("function_format: %s\n", function_format_to_str(analysis.tools.function_format));
LOG_ERR("argument_format: %s\n", argument_format_to_str(analysis.tools.argument_format));
LOG_ERR("tool_section_start: '%s'\n", analysis.tools.tool_section_start.c_str());
LOG_ERR("tool_section_end: '%s'\n", analysis.tools.tool_section_end.c_str());
LOG_ERR("function_prefix: '%s'\n", analysis.tools.function_prefix.c_str());
LOG_ERR("function_suffix: '%s'\n", analysis.tools.function_suffix.c_str());
LOG_ERR("function_close: '%s'\n", analysis.tools.function_close.c_str());
LOG_ERR("arg_prefix: '%s'\n", analysis.tools.arg_prefix.c_str());
LOG_ERR("arg_suffix: '%s'\n", analysis.tools.arg_suffix.c_str());
LOG_ERR("arg_close: '%s'\n", analysis.tools.arg_close.c_str());
LOG_ERR("name_field: '%s'\n", analysis.tools.name_field.c_str());
LOG_ERR("args_field: '%s'\n", analysis.tools.args_field.c_str());
LOG_ERR("id_field: '%s'\n", analysis.tools.id_field.c_str());
// Additional fields for special formats
if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
LOG_ERR("\n--- Prefixed-Indexed Format Details ---\n");
LOG_ERR("per_call_start: '%s'\n", analysis.tools.per_call_start.c_str());
LOG_ERR("function_namespace: '%s'\n", analysis.tools.function_namespace.c_str());
LOG_ERR("args_marker: '%s'\n", analysis.tools.args_marker.c_str());
LOG_ERR("per_call_end: '%s'\n", analysis.tools.per_call_end.c_str());
}
if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
LOG_ERR("\n--- Bracket-Tag Format Details ---\n");
LOG_ERR("per_call_start: '%s'\n", analysis.tools.per_call_start.c_str());
LOG_ERR("id_marker: '%s'\n", analysis.tools.id_marker.c_str());
LOG_ERR("args_marker: '%s'\n", analysis.tools.args_marker.c_str());
}
diff_analysis_result analysis = differential_analyzer::analyze(chat_template);
// Generate Parser
templates_params params;
@ -494,10 +410,45 @@ int main(int argc, char ** argv) {
}
params.parallel_tool_calls = false;
auto parser_data = universal_peg_generator::generate_parser(analysis, chat_template, params);
auto parser_data = universal_peg_generator::generate_parser(chat_template, params, analysis);
LOG_ERR("\n=== Differential Analysis Results ===\n");
LOG_ERR("\n--- Reasoning & Content Structure ---\n");
LOG_ERR("reasoning_mode: %s\n", mode_to_str(analysis.reasoning).c_str());
LOG_ERR("reasoning_start: '%s'\n", analysis.markers.reasoning_start.c_str());
LOG_ERR("reasoning_end: '%s'\n", analysis.markers.reasoning_end.c_str());
LOG_ERR("content_mode: %s\n", mode_to_str(analysis.content).c_str());
LOG_ERR("content_start: '%s'\n", analysis.markers.content_start.c_str());
LOG_ERR("content_end: '%s'\n", analysis.markers.content_end.c_str());
LOG_ERR("\n--- Tool Call Structure ---\n");
LOG_ERR("tool_mode: %s\n", mode_to_str(analysis.tools).c_str());
LOG_ERR("supports_tools: %s\n", analysis.supports_tools ? "true" : "false");
LOG_ERR("supports_parallel_calls: %s\n", analysis.supports_parallel_calls ? "true" : "false");
LOG_ERR("tool_section_start: '%s'\n", analysis.markers.tool_section_start.c_str());
LOG_ERR("tool_section_end: '%s'\n", analysis.markers.tool_section_end.c_str());
LOG_ERR("per_call_start: '%s'\n", analysis.markers.per_call_start.c_str());
LOG_ERR("per_call_end: '%s'\n", analysis.markers.per_call_end.c_str());
LOG_ERR("func_name_prefix: '%s'\n", analysis.markers.func_name_prefix.c_str());
LOG_ERR("func_name_suffix: '%s'\n", analysis.markers.func_name_suffix.c_str());
LOG_ERR("func_close: '%s'\n", analysis.markers.func_close.c_str());
LOG_ERR("arg_name_prefix: '%s'\n", analysis.markers.arg_name_prefix.c_str());
LOG_ERR("arg_name_suffix: '%s'\n", analysis.markers.arg_name_suffix.c_str());
LOG_ERR("arg_value_prefix: '%s'\n", analysis.markers.arg_value_prefix.c_str());
LOG_ERR("arg_value_suffix: '%s'\n", analysis.markers.arg_value_suffix.c_str());
LOG_ERR("name_field: '%s'\n", analysis.name_field.c_str());
LOG_ERR("args_field: '%s'\n", analysis.args_field.c_str());
LOG_ERR("id_field: '%s'\n", analysis.id_field.c_str());
LOG_ERR("gen_id_field: '%s'\n", analysis.gen_id_field.c_str());
LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.parameter_order.begin(), analysis.parameter_order.end(),
std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; }
).c_str());
LOG_ERR("\n=== Generated Parser ===\n");
LOG_ERR("%s\n", json::parse(parser_data.parser).dump(4).c_str());
common_peg_arena arena;
arena.load(parser_data.parser);
LOG_ERR("%s\n", arena.dump(arena.root()).c_str());
LOG_ERR("\n=== Generated Grammar ===\n");
LOG_ERR("%s\n", parser_data.grammar.c_str());

View File

@ -0,0 +1,610 @@
#include "chat-auto-parser.h"
#include "chat-auto-parser-helpers.h"
#include "chat.h"
#include "log.h"
#include "jinja/caps.h"
#include "jinja/runtime.h"
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>
#include "nlohmann/json.hpp"
using json = nlohmann::ordered_json;
// ANSI color codes - using 256-color palette for brighter colors (all bold)
#define ANSI_RESET "\033[0m"
#define ANSI_PURPLE "\033[1m\x1b[38;5;126m" // Bold bright purple for main headers
#define ANSI_CYAN "\033[1m\x1b[38;5;81m" // Bold bright cyan for section headers
#define ANSI_BLUE "\033[1m\x1b[38;5;12m" // Bold bright blue for labels
#define ANSI_ORANGE "\033[1m\x1b[38;5;209m" // Bold orange for right differences
#define ANSI_GREEN "\033[1m\x1b[38;5;83m" // Bold bright green for left differences
#define ANSI_GRAY "\033[1m\x1b[38;5;240m" // Bold gray (used for "no variables" message)
#define ANSI_BOLD "\033[1m" // Standalone bold
#define ANSI_PREFIX "\033[1m\x1b[38;5;176m" // Bold color for common prefix
#define ANSI_SUFFIX "\033[1m\x1b[38;5;61m" // Bold color for common suffix
// All template paths extracted from tests/test-chat.cpp
static const std::vector<std::string> ALL_TEMPLATE_PATHS = {
"models/templates/Apertus-8B-Instruct.jinja",
"models/templates/Apriel-1.6-15b-Thinker-fixed.jinja",
"models/templates/ByteDance-Seed-OSS.jinja",
"models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja",
"models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja",
"models/templates/GLM-4.6.jinja",
"models/templates/GLM-4.7-Flash.jinja",
"models/templates/Kimi-K2-Instruct.jinja",
"models/templates/Kimi-K2-Thinking.jinja",
"models/templates/MiMo-VL.jinja",
"models/templates/MiniMax-M2.jinja",
"models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja",
"models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja",
"models/templates/NVIDIA-Nemotron-Nano-v2.jinja",
"models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja",
"models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
"models/templates/Qwen-QwQ-32B.jinja",
"models/templates/Qwen-Qwen2.5-7B-Instruct.jinja",
"models/templates/Qwen3-Coder.jinja",
"models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja",
"models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja",
"models/templates/deepseek-ai-DeepSeek-V3.1.jinja",
"models/templates/fireworks-ai-llama-3-firefunction-v2.jinja",
"models/templates/google-gemma-2-2b-it.jinja",
"models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja",
"models/templates/llama-cpp-deepseek-r1.jinja",
"models/templates/meetkai-functionary-medium-v3.1.jinja",
"models/templates/meetkai-functionary-medium-v3.2.jinja",
"models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja",
"models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja",
"models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja",
"models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja",
"models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja",
"models/templates/moonshotai-Kimi-K2.jinja",
"models/templates/openai-gpt-oss-120b.jinja",
"models/templates/unsloth-Apriel-1.5.jinja",
"models/templates/unsloth-mistral-Devstral-Small-2507.jinja",
};
struct analysis_options {
std::vector<std::string> template_paths;
bool analyze_all = false;
};
static std::string read_file(const std::string & path) {
std::ifstream fin(path, std::ios::binary);
if (!fin.is_open()) {
throw std::runtime_error("Could not open file: " + path);
}
std::ostringstream buf;
buf << fin.rdbuf();
return buf.str();
}
static void print_usage(const char * program_name) {
LOG_ERR("Usage: %s [options]\n", program_name);
LOG_ERR("\nOptions:\n");
LOG_ERR(" --template <name> Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n");
LOG_ERR(" --template-file <path> Analyze custom template file\n");
LOG_ERR(" --all Analyze all templates from test suite\n");
LOG_ERR("\nExamples:\n");
LOG_ERR(" %s --all\n", program_name);
LOG_ERR(" %s --template deepseek\n", program_name);
LOG_ERR(" %s --template-file my-template.jinja\n", program_name);
}
static bool parse_options(int argc, char ** argv, analysis_options & opts) {
if (argc < 2) {
print_usage(argv[0]);
return false;
}
for (int i = 1; i < argc; ++i) {
std::string arg = argv[i];
if (arg == "--all") {
opts.analyze_all = true;
} else if (arg == "--template") {
if (i + 1 >= argc) {
LOG_ERR("--template requires an argument\n");
return false;
}
std::string pattern = argv[++i];
std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
// Find matching templates
bool found = false;
for (const auto & path : ALL_TEMPLATE_PATHS) {
std::string path_lower = path;
std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower);
if (path_lower.find(pattern) != std::string::npos) {
opts.template_paths.push_back(path);
found = true;
}
}
if (!found) {
LOG_ERR("No templates found matching: %s\n", pattern.c_str());
return false;
}
} else if (arg == "--template-file") {
if (i + 1 >= argc) {
LOG_ERR("--template-file requires an argument\n");
return false;
}
opts.template_paths.push_back(argv[++i]);
} else {
LOG_ERR("Unknown option: %s\n", arg.c_str());
print_usage(argv[0]);
return false;
}
}
if (opts.analyze_all) {
opts.template_paths = ALL_TEMPLATE_PATHS;
}
if (opts.template_paths.empty()) {
LOG_ERR("No templates specified\n");
print_usage(argv[0]);
return false;
}
return true;
}
static json build_tools_definition() {
json parameters_schema = json::object();
parameters_schema["type"] = "object";
parameters_schema["properties"] = json::object();
parameters_schema["properties"]["param1"] = json::object({
{ "type", "string" },
{ "description", "First parameter" }
});
parameters_schema["properties"]["param2"] = json::object({
{ "type", "string" },
{ "description", "Second parameter" }
});
parameters_schema["required"] = json::array({ "param1", "param2" });
return json::array({
json{ { "type", "function" },
{ "function", json{ { "name", "test_function_name" },
{ "description", "A test function for debugging" },
{ "parameters", parameters_schema } } } }
});
}
// Helper to create a tool call with arguments as JSON object
static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") {
return json{
{"id", id},
{"type", "function"},
{"function", json{
{"name", name},
{"arguments", args_object} // Pass as JSON object, not serialized string
}}
};
}
// Helper functions to create repeating message definitions
static json make_user_msg() {
return json{
{"role", "user"},
{"content", "Hello, please help me."}
};
}
static json make_user_msg2() {
return json{
{"role", "user"},
{"content", "Thank you."}
};
}
static json make_user_msg2_continue() {
return json{
{"role", "user"},
{"content", "Continue."}
};
}
static json make_assistant_no_tool() {
return json{
{"role", "assistant"},
{"content", "Let me help you."}
};
}
static json make_assistant_one_tool() {
return json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
})}
};
}
static json make_assistant_two_tools() {
return json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
})}
};
}
static json make_assistant_no_reasoning() {
return json{
{"role", "assistant"},
{"content", "I can help you with that."}
};
}
static json make_assistant_with_reasoning() {
return json{
{"role", "assistant"},
{"content", "I can help you with that."},
{"reasoning_content", "The user is asking for help. I should respond positively."}
};
}
static json make_assistant_one_tool_with_reasoning() {
return json{
{"role", "assistant"},
{"content", nullptr},
{"tool_calls", json::array({
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
})},
{"reasoning_content", "I need to call the tool first."}
};
}
static void print_diff_split(const std::string & title, const diff_split & diff) {
LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET);
LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str());
LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str());
LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str());
LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str());
}
static void check_reasoning_variables(const common_chat_template & tmpl) {
LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET);
try {
// Create a list of candidate reasoning/thinking variable names to probe
std::vector<std::string> candidate_vars = {
"enable_reasoning",
"use_reasoning",
"reasoning_enabled",
"has_reasoning",
"reasoning_mode",
"reasoning_format",
"reasoning_active",
"with_reasoning",
"use_thinking",
"thinking_enabled",
"has_thinking",
"thinking_mode",
"thinking_format",
"thinking_active",
"with_thinking",
"enable_reason",
"reason_enabled",
"enable_think",
"think_enabled",
};
jinja::context ctx;
ctx.is_get_stats = true;
json messages = json::array({
json{
{"role", "user"},
{"content", "Test message"}
},
json{
{"role", "assistant"},
{"content", "Response"},
{"reasoning_content", "Some reasoning"}
}
});
// Set up base context
jinja::global_from_json(ctx, json{
{"messages", messages},
{"tools", json::array()},
{"bos_token", ""},
{"eos_token", ""},
{"add_generation_prompt", false},
{"enable_thinking", true} // Already passed, so we'll exclude this from results
}, true);
// Add candidate variables as undefined to probe which ones are accessed
for (const auto & var_name : candidate_vars) {
ctx.set_val(var_name, jinja::mk_val<jinja::value_undefined_t>(var_name));
}
try {
jinja::runtime runtime(ctx);
runtime.execute(tmpl.prog);
} catch (const std::exception & e) {
// Execution may fail, that's okay - we just want to see what variables were accessed
}
// Check which candidate variables were accessed (stats.used = true)
std::vector<std::string> accessed_vars;
for (const auto & var_name : candidate_vars) {
auto val = ctx.get_val(var_name);
if (!val->is_undefined()) {
// Variable was overwritten, skip it
continue;
}
if (val->stats.used) {
accessed_vars.push_back(var_name);
}
}
if (accessed_vars.empty()) {
LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET);
} else {
LOG_ERR("Template queries the following reasoning/thinking-related variables:\n");
for (const auto & var : accessed_vars) {
LOG_ERR(" %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET);
}
}
} catch (const std::exception & e) {
LOG_ERR("Error checking reasoning variables: %s\n", e.what());
}
}
static void analyze_template(const std::string & template_path) {
LOG_ERR("\n");
LOG_ERR("%s", ANSI_PURPLE);
LOG_ERR("================================================================================\n");
LOG_ERR(" ANALYZING TEMPLATE: %s\n", template_path.c_str());
LOG_ERR("================================================================================\n");
LOG_ERR("%s", ANSI_RESET);
std::string template_source;
try {
template_source = read_file(template_path);
} catch (const std::exception & e) {
LOG_ERR("Error reading template: %s\n", e.what());
return;
}
try {
common_chat_template chat_template(template_source, "", "");
json tools = build_tools_definition();
// ===== CAPABILITIES ANALYSIS =====
LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET);
auto caps = chat_template.original_caps();
LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false");
LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false");
LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false");
LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false");
LOG_ERR("%srequires_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.requires_typed_content ? "true" : "false");
// ===== DIFFERENTIAL ANALYSIS =====
// Test 1: With and without tools (single user message)
{
json user_msg = make_user_msg();
templates_params params_no_tools;
params_no_tools.messages = json::array({ user_msg });
params_no_tools.add_generation_prompt = false;
params_no_tools.tools = json::array();
templates_params params_with_tools = params_no_tools;
params_with_tools.tools = tools;
std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools);
auto diff = calculate_diff_split(output_no_tools, output_with_tools);
print_diff_split("Diff: With vs Without Tools (single user message)", diff);
}
// Test 2: With and without add_generation_prompt (single user message)
{
json user_msg = make_user_msg();
templates_params params_no_prompt;
params_no_prompt.messages = json::array({ user_msg });
params_no_prompt.add_generation_prompt = false;
params_no_prompt.tools = json::array();
templates_params params_with_prompt = params_no_prompt;
params_with_prompt.add_generation_prompt = true;
std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt);
auto diff = calculate_diff_split(output_no_prompt, output_with_prompt);
print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff);
}
// Test 3: Assistant with reasoning_content (user, assistant)
{
json user_msg = make_user_msg();
templates_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.enable_thinking = true;
templates_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff);
}
// Test 4: Assistant with reasoning_content (user, assistant, user)
{
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2();
templates_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.enable_thinking = true;
templates_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff);
}
// Test 5: Tool call in last assistant message (user, assistant)
{
json user_msg = make_user_msg();
templates_params params_no_tool;
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
params_no_tool.add_generation_prompt = false;
params_no_tool.tools = tools;
templates_params params_with_tool = params_no_tool;
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
auto diff = calculate_diff_split(output_no_tool, output_with_tool);
print_diff_split("Diff: With vs Without tool call (user, assistant)", diff);
}
// Test 6: Tool call in last assistant message (user, assistant, user)
{
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2_continue();
templates_params params_no_tool;
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
params_no_tool.add_generation_prompt = false;
params_no_tool.tools = tools;
templates_params params_with_tool = params_no_tool;
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
auto diff = calculate_diff_split(output_no_tool, output_with_tool);
print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff);
}
// Test 7: One vs two tool calls (user, assistant)
{
json user_msg = make_user_msg();
templates_params params_one_tool;
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
params_one_tool.add_generation_prompt = false;
params_one_tool.tools = tools;
templates_params params_two_tools = params_one_tool;
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
auto diff = calculate_diff_split(output_one_tool, output_two_tools);
print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff);
}
// Test 8: One vs two tool calls (user, assistant, user)
{
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2_continue();
templates_params params_one_tool;
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
params_one_tool.add_generation_prompt = false;
params_one_tool.tools = tools;
templates_params params_two_tools = params_one_tool;
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
auto diff = calculate_diff_split(output_one_tool, output_two_tools);
print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff);
}
// Test 9: Tool call with vs without reasoning_content (user, assistant)
{
json user_msg = make_user_msg();
templates_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.tools = tools;
params_no_reasoning.enable_thinking = true;
templates_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff);
}
// Check reasoning variables
check_reasoning_variables(chat_template);
} catch (const std::exception & e) {
LOG_ERR("Analysis failed: %s\n", e.what());
}
}
int main(int argc, char ** argv) {
// Set log level to capture all output
common_log_set_verbosity_thold(99);
analysis_options opts;
if (!parse_options(argc, argv, opts)) {
return 1;
}
LOG_ERR("\n");
LOG_ERR("%s", ANSI_PURPLE);
LOG_ERR("================================================================================\n");
LOG_ERR(" TEMPLATE ANALYSIS TOOL\n");
LOG_ERR("================================================================================\n");
LOG_ERR("%s", ANSI_RESET);
LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET);
for (const auto & path : opts.template_paths) {
analyze_template(path);
}
LOG_ERR("\n");
LOG_ERR("%s", ANSI_GREEN);
LOG_ERR("================================================================================\n");
LOG_ERR(" ANALYSIS COMPLETE\n");
LOG_ERR("================================================================================\n");
LOG_ERR("%s", ANSI_RESET);
return 0;
}