Refactor into class-based approach

This commit is contained in:
Piotr Wilkin 2026-02-14 00:17:43 +01:00
parent 6415d0f03f
commit 3605e78569
11 changed files with 706 additions and 644 deletions

View File

@ -19,22 +19,23 @@ static void foreach_function(const json & tools, const std::function<void(const
}
}
namespace autoparser {
parser_build_context::parser_build_context(common_chat_peg_unified_builder & p, const templates_params & inputs)
: p(p), inputs(inputs), reasoning_parser(p.eps()) {}
common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs) {
// Run differential analysis to extract template structure
auto analysis = differential_analyzer::analyze(tmpl);
analyze_template analysis(tmpl);
return generate_parser(tmpl, inputs, analysis);
}
common_chat_params universal_peg_generator::generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs,
const diff_analysis_result & analysis) {
// Check for thinking forced open
bool thinking_forced_open = (analysis.reasoning.mode == reasoning_mode::FORCED_OPEN);
bool thinking_forced_closed = (analysis.reasoning.mode == reasoning_mode::FORCED_CLOSED);
const analyze_template & analysis) {
// Build the parser using the analysis results
auto parser = build_parser(analysis, inputs, thinking_forced_open, thinking_forced_closed);
auto parser = analysis.build_parser(inputs);
// Create the result structure
common_chat_params data;
@ -73,145 +74,177 @@ common_chat_params universal_peg_generator::generate_parser(const common_chat_te
return data;
}
common_peg_arena universal_peg_generator::build_parser(const diff_analysis_result & analysis,
const struct templates_params & inputs,
bool thinking_forced_open,
bool thinking_forced_closed) {
// ============================================================================
// analyze_template::build_parser - orchestrates parser building
// ============================================================================
common_peg_arena analyze_template::build_parser(const templates_params & inputs) const {
return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
p.set_allow_python_dict_format(true);
common_peg_parser reasoning = p.eps();
parser_build_context ctx(p, inputs);
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
bool enable_thinking = inputs.enable_thinking;
if (extract_reasoning && enable_thinking && analysis.reasoning.mode != reasoning_mode::NONE) {
if (thinking_forced_open || thinking_forced_closed) {
// Thinking is forced open OR forced closed with enable_thinking=true
// In both cases, expect only the closing tag (opening was in template)
reasoning = p.reasoning(p.until(analysis.reasoning.end)) + analysis.reasoning.end;
} else if (analysis.reasoning.mode == reasoning_mode::TAG_BASED ||
analysis.reasoning.mode == reasoning_mode::TOOLS_ONLY) {
// Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
// Both use the same tag-based pattern if markers are available
if (!analysis.reasoning.start.empty() && !analysis.reasoning.end.empty()) {
reasoning = p.optional(analysis.reasoning.start + p.reasoning(p.until(analysis.reasoning.end)) + analysis.reasoning.end);
}
} else if (analysis.reasoning.mode == reasoning_mode::DELIMITER) {
reasoning = p.optional(p.reasoning(p.until(analysis.reasoning.end)) + analysis.reasoning.end);
}
}
ctx.extracting_reasoning = extract_reasoning && enable_thinking && reasoning.mode != reasoning_mode::NONE;
ctx.content = &content;
// Build reasoning parser
ctx.reasoning_parser = reasoning.build_parser(ctx);
bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
if (has_response_format) {
return reasoning + p.space() + p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end();
return ctx.reasoning_parser + p.space() + p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end();
}
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && analysis.jinja_caps.supports_tool_calls) {
return build_tool_parser(p, analysis, inputs, reasoning);
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
return tools.build_parser(ctx);
}
if (analysis.content.mode == content_mode::ALWAYS_WRAPPED &&
!analysis.content.start.empty() && !analysis.content.end.empty()) {
bool extracting_reasoning = extract_reasoning && enable_thinking && analysis.reasoning.mode != reasoning_mode::NONE;
if (extracting_reasoning) {
return reasoning + analysis.content.start + p.content(p.until(analysis.content.end)) + analysis.content.end + p.end();
}
return p.content(p.until(analysis.content.start)) + analysis.content.start + p.content(p.until(analysis.content.end)) + analysis.content.end + p.end();
}
return reasoning + p.content(p.rest()) + p.end();
return content.build_parser(ctx);
});
}
common_peg_parser universal_peg_generator::build_tool_parser(
common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning) {
// ============================================================================
// analyze_reasoning::build_parser
// ============================================================================
switch (analysis.tools.format.mode) {
common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) const {
auto & p = ctx.p;
if (!ctx.extracting_reasoning) {
return p.eps();
}
bool thinking_forced_open = (mode == reasoning_mode::FORCED_OPEN);
bool thinking_forced_closed = (mode == reasoning_mode::FORCED_CLOSED);
if (thinking_forced_open || thinking_forced_closed) {
// Thinking is forced open OR forced closed with enable_thinking=true
// In both cases, expect only the closing tag (opening was in template)
return p.reasoning(p.until(end)) + end;
}
if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) {
// Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
// Both use the same tag-based pattern if markers are available
if (!start.empty() && !end.empty()) {
return p.optional(start + p.reasoning(p.until(end)) + end);
}
} else if (mode == reasoning_mode::DELIMITER) {
return p.optional(p.reasoning(p.until(end)) + end);
}
return p.eps();
}
// ============================================================================
// analyze_content::build_parser
// ============================================================================
common_peg_parser analyze_content::build_parser(parser_build_context & ctx) const {
auto & p = ctx.p;
if (is_always_wrapped()) {
if (ctx.extracting_reasoning) {
return ctx.reasoning_parser + start + p.content(p.until(end)) + end + p.end();
}
return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
}
return ctx.reasoning_parser + p.content(p.rest()) + p.end();
}
common_peg_parser analyze_content::build_optional_wrapped(parser_build_context & ctx) const {
auto & p = ctx.p;
if (is_always_wrapped()) {
return p.optional(start + p.content(p.until(end)) + end);
}
return p.eps();
}
// ============================================================================
// analyze_tools::build_parser - dispatches to format-specific builders
// ============================================================================
common_peg_parser analyze_tools::build_parser(parser_build_context & ctx) const {
switch (format.mode) {
case tool_format::JSON_NATIVE:
return build_tool_parser_json_native(p, analysis, inputs, reasoning);
return build_tool_parser_json_native(ctx);
case tool_format::TAG_WITH_JSON:
return build_tool_parser_tag_json(p, analysis, inputs, reasoning);
return build_tool_parser_tag_json(ctx);
case tool_format::TAG_WITH_TAGGED:
return build_tool_parser_tag_tagged(p, analysis, inputs, reasoning);
return build_tool_parser_tag_tagged(ctx);
default:
GGML_ABORT("Unable to create tool parser");
}
}
common_peg_parser universal_peg_generator::build_tool_parser_json_native(
common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning) {
common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_context & ctx) const {
auto & p = ctx.p;
const auto & inputs = ctx.inputs;
// Build effective field names with dot notation if function_field is set
std::string name_field = analysis.tools.format.name_field;
std::string args_field = analysis.tools.format.args_field;
std::string name_field = format.name_field;
std::string args_field = format.args_field;
if (!analysis.tools.format.function_field.empty() &&
analysis.tools.format.function_field != "function" &&
if (!format.function_field.empty() &&
format.function_field != "function" &&
name_field.find('.') == std::string::npos) {
name_field = analysis.tools.format.function_field + "." + name_field;
args_field = analysis.tools.format.function_field + "." + args_field;
name_field = format.function_field + "." + name_field;
args_field = format.function_field + "." + args_field;
}
auto tools_parser = p.standard_json_tools(
analysis.tools.format.section_start,
analysis.tools.format.section_end,
format.section_start,
format.section_end,
inputs.tools,
inputs.parallel_tool_calls,
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
name_field,
args_field,
analysis.tools.format.tools_array_wrapped,
analysis.tools.format.fun_name_is_key,
analysis.tools.format.id_field,
analysis.tools.format.gen_id_field,
analysis.tools.format.parameter_order
format.tools_array_wrapped,
format.fun_name_is_key,
format.id_field,
format.gen_id_field,
format.parameter_order
);
// Handle content wrappers if present
if (analysis.content.mode == content_mode::ALWAYS_WRAPPED &&
!analysis.content.start.empty() && !analysis.content.end.empty()) {
auto wrapped_content = p.optional(analysis.content.start + p.content(p.until(analysis.content.end)) + analysis.content.end);
return reasoning + wrapped_content + tools_parser + p.end();
if (ctx.content && ctx.content->is_always_wrapped()) {
auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
}
auto content_before_tools = analysis.tools.format.section_start.empty() ? p.eps() : p.until(analysis.tools.format.section_start);
return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
auto content_before_tools = format.section_start.empty() ? p.eps() : p.until(format.section_start);
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
}
common_peg_parser universal_peg_generator::build_tool_parser_tag_json(
common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning) {
common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const {
auto & p = ctx.p;
const auto & inputs = ctx.inputs;
common_peg_parser tool_choice = p.choice();
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & schema = function.at("parameters");
const auto & func = tool.at("function");
std::string name = func.at("name");
const auto & schema = func.at("parameters");
// Build call_id parser based on position (if supported)
common_peg_parser call_id_section = p.eps();
if (analysis.tools.call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!analysis.tools.call_id.prefix.empty() && !analysis.tools.call_id.suffix.empty()) {
call_id_section = p.optional(analysis.tools.call_id.prefix + p.tool_id(p.until(analysis.tools.call_id.suffix))) + analysis.tools.call_id.suffix;
if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!call_id.prefix.empty() && !call_id.suffix.empty()) {
call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
}
auto func_parser = p.tool_open(analysis.tools.function.name_prefix + p.tool_name(p.literal(name)) + analysis.tools.function.name_suffix) +
auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
call_id_section +
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
if (!analysis.tools.function.close.empty()) {
func_parser = func_parser + analysis.tools.function.close;
if (!function.close.empty()) {
func_parser = func_parser + function.close;
}
tool_choice |= p.rule("tool-" + name, func_parser);
@ -221,26 +254,26 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_json(
common_peg_parser tool_calls = p.eps();
if (!analysis.tools.format.per_call_start.empty()) {
auto wrapped_call = analysis.tools.format.per_call_start + tool_choice + analysis.tools.format.per_call_end;
if (!format.per_call_start.empty()) {
auto wrapped_call = format.per_call_start + tool_choice + format.per_call_end;
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
tool_calls = p.trigger_rule("tool-call", wrapped_call);
}
if (!analysis.tools.format.section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(analysis.tools.format.section_start) + p.space() +
tool_calls + p.space() + (analysis.tools.format.section_end.empty() ? p.end() : p.literal(analysis.tools.format.section_end)));
if (!format.section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() +
tool_calls + p.space() + (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
}
} else {
std::string separator = ", "; // Default
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
analysis.tools.format.section_start + tool_choice + p.zero_or_more(separator + tool_choice) + analysis.tools.format.section_end);
format.section_start + tool_choice + p.zero_or_more(separator + tool_choice) + format.section_end);
} else {
tool_calls = p.trigger_rule("tool-call",
analysis.tools.format.section_start + tool_choice + analysis.tools.format.section_end);
format.section_start + tool_choice + format.section_end);
}
}
@ -248,23 +281,21 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_json(
tool_calls = p.optional(tool_calls);
}
std::string trigger_marker = !analysis.tools.format.section_start.empty() ? analysis.tools.format.section_start : analysis.tools.format.per_call_start;
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged(
common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning) {
common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
auto & p = ctx.p;
const auto & inputs = ctx.inputs;
common_peg_parser tool_choice = p.choice();
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & params = function.at("parameters");
const auto & func = tool.at("function");
std::string name = func.at("name");
const auto & params = func.at("parameters");
if (!params.contains("properties") || !params.at("properties").is_object()) {
return;
@ -283,13 +314,13 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged(
auto type = param_schema.value("type", "object");
auto arg = p.tool_arg(
p.tool_arg_open(analysis.tools.arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + analysis.tools.arguments.name_suffix) + analysis.tools.arguments.value_prefix +
p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix +
(type == "string" ?
p.tool_arg_string_value(p.schema(p.until(analysis.tools.arguments.value_suffix),
p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
"tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
p.tool_arg_json_value(p.schema(p.json(),
"tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
p.tool_arg_close(p.literal(analysis.tools.arguments.value_suffix))
p.tool_arg_close(p.literal(arguments.value_suffix))
);
if (is_required) {
@ -310,23 +341,23 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged(
// Build call_id parser based on position (if supported)
common_peg_parser call_id_section = p.eps();
if (analysis.tools.call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!analysis.tools.call_id.prefix.empty() && !analysis.tools.call_id.suffix.empty()) {
call_id_section = p.optional(analysis.tools.call_id.prefix + p.tool_id(p.until(analysis.tools.call_id.suffix))) + analysis.tools.call_id.suffix;
if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!call_id.prefix.empty() && !call_id.suffix.empty()) {
call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
}
auto func_parser = p.tool_open(analysis.tools.function.name_prefix + p.tool_name(p.literal(name)) + analysis.tools.function.name_suffix) +
auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
call_id_section +
p.space() + args_seq;
if (!analysis.tools.function.close.empty()) {
func_parser = func_parser + p.space() + p.tool_close(p.literal(analysis.tools.function.close));
} else if (!analysis.tools.format.per_call_end.empty()) {
if (!function.close.empty()) {
func_parser = func_parser + p.space() + p.tool_close(p.literal(function.close));
} else if (!format.per_call_end.empty()) {
// When there's no func_close but there is a per_call_end marker, use peek() to ensure
// we only emit tool_close when we can actually see the closing marker. This prevents
// premature closing during partial parsing when we've seen e.g. "</" which could be
// either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
func_parser = func_parser + p.tool_close(p.peek(p.literal(analysis.tools.format.per_call_end)));
func_parser = func_parser + p.tool_close(p.peek(p.literal(format.per_call_end)));
} else {
func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
}
@ -338,26 +369,26 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged(
common_peg_parser tool_calls = p.eps();
if (!analysis.tools.format.per_call_start.empty()) {
auto wrapped_call = analysis.tools.format.per_call_start + p.space() + tool_choice + p.space() + analysis.tools.format.per_call_end;
if (!format.per_call_start.empty()) {
auto wrapped_call = format.per_call_start + p.space() + tool_choice + p.space() + format.per_call_end;
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
tool_calls = p.trigger_rule("tool-call", wrapped_call);
}
if (!analysis.tools.format.section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(analysis.tools.format.section_start) + p.space() +
tool_calls + p.space() + (analysis.tools.format.section_end.empty() ? p.end() : p.literal(analysis.tools.format.section_end)));
if (!format.section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() +
tool_calls + p.space() + (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
}
} else {
std::string separator = ", "; // Default
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
analysis.tools.format.section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + analysis.tools.format.section_end);
format.section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + format.section_end);
} else {
tool_calls = p.trigger_rule("tool-call",
analysis.tools.format.section_start + p.space() + tool_choice + p.space() + analysis.tools.format.section_end);
format.section_start + p.space() + tool_choice + p.space() + format.section_end);
}
}
@ -365,7 +396,9 @@ common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged(
tool_calls = p.optional(tool_calls);
}
std::string trigger_marker = !analysis.tools.format.section_start.empty() ? analysis.tools.format.section_start : analysis.tools.format.per_call_start;
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
} // namespace autoparser

View File

@ -1,6 +1,9 @@
#include "chat-auto-parser-helpers.h"
#include "chat-auto-parser.h"
#include "chat-diff-analyzer.h"
#include "chat.h"
#include "log.h"
#include "nlohmann/json.hpp"
#include <cctype>
@ -289,3 +292,57 @@ std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segm
return result;
}
namespace autoparser {
std::string apply_template(const common_chat_template & tmpl, const template_params & params) {
templates_params tmpl_params;
tmpl_params.messages = params.messages;
tmpl_params.tools = params.tools;
tmpl_params.add_generation_prompt = params.add_generation_prompt;
tmpl_params.enable_thinking = params.enable_thinking;
if (params.extra_context) {
tmpl_params.extra_context = *params.extra_context;
}
tmpl_params.extra_context["enable_thinking"] = params.enable_thinking;
try {
return common_chat_template_direct_apply(tmpl, tmpl_params);
} catch (const std::exception & e) {
LOG_DBG("Template application failed: %s\n", e.what());
return "";
}
}
std::optional<compare_variants_result> compare_variants(
const common_chat_template & tmpl,
const template_params & params_A,
const std::function<void(template_params &)> & params_modifier) {
// Create variant B by copying A
template_params params_B = params_A;
// Apply modifier to create variant B
if (params_modifier) {
params_modifier(params_B);
}
// Apply template to both variants
std::string output_A = apply_template(tmpl, params_A);
std::string output_B = apply_template(tmpl, params_B);
// Check for template application failures
if (output_A.empty() || output_B.empty()) {
return std::nullopt;
}
// Calculate diff and return result with both outputs
compare_variants_result result;
result.diff = calculate_diff_split(output_A, output_B);
result.output_A = output_A;
result.output_B = output_B;
return result;
}
} // namespace autoparser

View File

@ -1,6 +1,8 @@
#pragma once
#include "chat-diff-analyzer.h"
#include <functional>
#include <optional>
#include <string>
std::string trim_whitespace(const std::string & str);
@ -54,3 +56,18 @@ std::vector<segment> segmentize_markers(const std::string & text);
// prune_whitespace_segments(X) -> [ (MARKER, "<tool_call>"), (MARKER, "<function=foo>"), (MARKER, "<arg=bar>"), (MARKER, "</arg>"),
// (MARKER, "</function>"), (MARKER, "</tool_call>") ]
std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments);
namespace autoparser {
// Apply a template with the given parameters, returning the rendered string (empty on failure)
std::string apply_template(const common_chat_template & tmpl, const template_params & params);
// Factorized differential comparison function
// Takes base params and a single modifier lambda to create variant B
// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure
std::optional<compare_variants_result> compare_variants(
const common_chat_template & tmpl,
const template_params & params_A,
const std::function<void(template_params &)> & params_modifier);
} // namespace autoparser

View File

@ -10,6 +10,8 @@
using json = nlohmann::ordered_json;
namespace autoparser {
struct templates_params {
json messages;
json tools;
@ -37,34 +39,7 @@ class universal_peg_generator {
static common_chat_params generate_parser(const common_chat_template & tmpl,
const struct templates_params & inputs,
const diff_analysis_result & analysis);
private:
// Build unified parser (single code path for all formats)
static common_peg_arena build_parser(const diff_analysis_result & analysis,
const struct templates_params & inputs,
bool thinking_forced_open,
bool thinking_forced_closed = false);
// Build tool calling parser based on detected format
static common_peg_parser build_tool_parser(common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
// Per-format tool parser builders
static common_peg_parser build_tool_parser_json_native(common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
static common_peg_parser build_tool_parser_tag_json(common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
static common_peg_parser build_tool_parser_tag_tagged(common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
const analyze_template & analysis);
};
} // namespace autoparser

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
#include "chat.h"
#include "jinja/caps.h"
#include "peg-parser.h"
#include "nlohmann/json.hpp"
#include <functional>
@ -12,6 +13,8 @@
using json = nlohmann::ordered_json;
class common_chat_peg_unified_builder;
// ============================================================================
// Parameters for template application
// ============================================================================
@ -41,6 +44,10 @@ struct compare_variants_result {
std::string output_B;
};
namespace autoparser {
struct templates_params;
// ============================================================================
// Marker Registry: All markers extracted via differential analysis
// ============================================================================
@ -182,21 +189,9 @@ inline std::ostream & operator<<(std::ostream & os, const tool_format & format)
}
}
struct reasoning_analysis {
reasoning_mode mode = reasoning_mode::NONE;
std::string start; // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
std::string end; // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
};
struct content_analysis {
content_mode mode = content_mode::PLAIN;
std::string start; // e.g., "<response>", ">>>all\n", ""
std::string end; // e.g., "</response>", ""
bool requires_nonnull_content = false;
};
// ============================================================================
// Sub-structs for tool analysis
// ============================================================================
struct tool_format_analysis {
tool_format mode = tool_format::NONE;
@ -240,127 +235,176 @@ struct tool_id_analysis {
std::string suffix; // e.g., "" (marker after call ID value, before next section)
};
struct tool_analysis {
// ============================================================================
// Parser build context (shared interface for build_parser methods)
// ============================================================================
struct analyze_content;
struct parser_build_context {
common_chat_peg_unified_builder & p;
const templates_params & inputs;
common_peg_parser reasoning_parser;
bool extracting_reasoning = false;
const analyze_content * content = nullptr;
parser_build_context(common_chat_peg_unified_builder & p, const templates_params & inputs);
};
// ============================================================================
// Base class for analyzers with parser building
// ============================================================================
struct analyze_base {
virtual ~analyze_base() = default;
virtual common_peg_parser build_parser(parser_build_context & ctx) const = 0;
protected:
const common_chat_template * tmpl = nullptr;
analyze_base() = default;
explicit analyze_base(const common_chat_template & tmpl) : tmpl(&tmpl) {}
};
// ============================================================================
// Reasoning analyzer
// ============================================================================
struct analyze_reasoning : analyze_base {
reasoning_mode mode = reasoning_mode::NONE;
std::string start; // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
std::string end; // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
analyze_reasoning() = default;
analyze_reasoning(const common_chat_template & tmpl, bool supports_tools);
common_peg_parser build_parser(parser_build_context & ctx) const override;
private:
// Look for reasoning markers in rendered content
void compare_reasoning_presence();
// Compare generation prompt with enable_thinking=true vs false
void compare_thinking_enabled();
// Check if reasoning is always possible or only in tool calls
void compare_reasoning_scope();
};
// ============================================================================
// Content analyzer
// ============================================================================
struct analyze_content : analyze_base {
content_mode mode = content_mode::PLAIN;
std::string start; // e.g., "<response>", ">>>all\n", ""
std::string end; // e.g., "</response>", ""
bool requires_nonnull_content = false;
analyze_content() = default;
analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning);
common_peg_parser build_parser(parser_build_context & ctx) const override;
bool is_always_wrapped() const;
common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
};
// ============================================================================
// Tool analyzer
// ============================================================================
struct analyze_tools : analyze_base {
tool_format_analysis format;
tool_function_analysis function;
tool_arguments_analysis arguments;
tool_id_analysis call_id;
analyze_tools() = default;
analyze_tools(const common_chat_template & tmpl,
const jinja::caps & caps,
const analyze_reasoning & reasoning);
common_peg_parser build_parser(parser_build_context & ctx) const override;
private:
// Extract tool calling 'haystack' for further analysis and delegate further analysis based on format
void analyze_tool_calls(const analyze_reasoning & reasoning);
// Analyze format based on position of function and argument name in needle
void analyze_tool_call_format(const std::string & haystack,
const std::string & fun_name_needle,
const std::string & arg_name_needle,
const analyze_reasoning & reasoning);
// Analyze specifics of JSON native format (entire tool call is a JSON object)
void analyze_tool_call_format_json_native(const std::string & clean_haystack,
const std::string & fun_name_needle,
const std::string & arg_name_needle);
// Analyze specifics of non-JSON native format (tags for function name or for function name and arguments)
void analyze_tool_call_format_non_json(const std::string & clean_haystack,
const std::string & fun_name_needle);
// Check for and extract specific per-call markers for non-native-JSON templates with parallel call support
void check_per_call_markers();
// Extract function name markers
void extract_function_markers();
// Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis
void analyze_arguments();
// Extract argument name markers
void extract_argument_name_markers();
// Extract argument value markers
void extract_argument_value_markers();
// Extract argument separator, if specified (eg. <arg=foo>...</arg><sep><arg=bar>...</arg>)
void extract_argument_separator();
// Extract argument wrapper markers, if present (eg. '<args><arg=foo>...</arg><arg=bar>...</arg></args>')
void extract_args_markers();
// Extract call ID markers, if present
void extract_call_id_markers();
// Per-format tool parser builders
common_peg_parser build_tool_parser_json_native(parser_build_context & ctx) const;
common_peg_parser build_tool_parser_tag_json(parser_build_context & ctx) const;
common_peg_parser build_tool_parser_tag_tagged(parser_build_context & ctx) const;
};
// Complete result of differential analysis
struct diff_analysis_result {
// ============================================================================
// Top-level template analyzer (merges differential_analyzer + diff_analysis_result)
// ============================================================================
struct analyze_template {
jinja::caps jinja_caps;
reasoning_analysis reasoning;
content_analysis content;
tool_analysis tools;
analyze_reasoning reasoning;
analyze_content content;
analyze_tools tools;
// Preserved tokens for tokenizer (union of all non-empty markers)
std::vector<std::string> preserved_tokens;
};
// Performs systematic differential analysis on chat templates
// Uses comparison matrix to extract markers without heuristics
class differential_analyzer {
public:
// Main entry point: Run full differential analysis on a template
static diff_analysis_result analyze(const common_chat_template & tmpl);
// Constructor: runs full differential analysis on a template
explicit analyze_template(const common_chat_template & tmpl);
// Phase-specific analysis (can be called individually for testing)
static reasoning_analysis analyze_reasoning(const common_chat_template & tmpl, bool supports_tools);
static content_analysis analyze_content(const common_chat_template & tmpl, const reasoning_analysis & reasoning);
static tool_analysis analyze_tools(const common_chat_template & tmpl,
const jinja::caps & caps,
const reasoning_analysis & reasoning);
// Factorized differential comparison function (public for testing)
// Takes base params and a single modifier lambda to create variant B
// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure
static std::optional<compare_variants_result> compare_variants(
const common_chat_template & tmpl,
const template_params & params_A,
const std::function<void(template_params &)> & params_modifier);
// Build the unified PEG parser for this template
common_peg_arena build_parser(const templates_params & inputs) const;
private:
// Comparison helpers (implement the comparison matrix from the plan)
// 1. Reasoning analysis:
// Look for reasoning markers in rendered content
static void compare_reasoning_presence(const common_chat_template & tmpl, reasoning_analysis & reasoning);
// Compare generation prompt with enable_thinking=true vs false
static void compare_thinking_enabled(const common_chat_template & tmpl, reasoning_analysis & reasoning);
// Check if reasoning is always possible or only in tool calls
static void compare_reasoning_scope(const common_chat_template & tmpl, reasoning_analysis & reasoning);
// 2. Content (fully inside analyze_content mentioned above)
// 3. Tool calls
// a. format
// Extract tool calling 'haystack' for further analysis and delegate further analysis based on format
static tool_format_analysis analyze_tool_calls(const common_chat_template & tmpl,
const reasoning_analysis & reasoning);
// Analyze format based on position of function and argument name in needle
static tool_format_analysis analyze_tool_call_format(const std::string & haystack,
const std::string & fun_name_needle,
const std::string & arg_name_needle,
const reasoning_analysis & reasoning);
// Analyze specifics of JSON native format (entire tool call is a JSON object)
static void analyze_tool_call_format_json_native(const std::string & clean_haystack,
const std::string & fun_name_needle,
const std::string & arg_name_needle,
tool_format_analysis & format);
// Analyze specifics of non-JSON native format (tags for function name or for function name and arguments)
static void analyze_tool_call_format_non_json(const std::string & clean_haystack,
const std::string & fun_name_needle,
tool_format_analysis & format);
// Check for and extract specific per-call markers for non-native-JSON templates with parallel call support
static void check_per_call_markers(const common_chat_template & tmpl, tool_format_analysis & result);
// Logic below is only for non-JSON-native tool calling formats
// 3. b. function name
// Extract function name markers
static tool_function_analysis extract_function_markers(const common_chat_template & tmpl,
const tool_format_analysis & analysis);
// 4. c. function arguments
// Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis
static tool_arguments_analysis analyze_arguments(const common_chat_template & tmpl,
const tool_analysis & analysis);
// Extract argument name markers
static void extract_argument_name_markers(const common_chat_template & tmpl,
tool_arguments_analysis & args_analysis);
// Extract argument value markers
static void extract_argument_value_markers(const common_chat_template & tmpl,
const tool_analysis & analysis,
tool_arguments_analysis & args_analysis);
// Extract argument separator, if specified (eg. <arg=foo>...</arg><sep><arg=bar>...</arg>)
static void extract_argument_separator(const common_chat_template & tmpl,
tool_arguments_analysis & args_analysis);
// Extract argument wrapper markers, if present (eg. '<args><arg=foo>...</arg><arg=bar>...</arg></args>')
static void extract_args_markers(const common_chat_template & tmpl,
const tool_analysis & analysis,
tool_arguments_analysis & args_analysis);
// 4. d. function call id
// Extract call ID markers, if present
static tool_id_analysis extract_call_id_markers(const common_chat_template & tmpl,
tool_format_analysis & analysis);
// Collect tokens from entire analysis to preserve
static void collect_preserved_tokens(diff_analysis_result & result);
static std::string apply_template(const common_chat_template & tmpl, const template_params & params);
void collect_preserved_tokens();
};
} // namespace autoparser
enum segment_type { TEXT, MARKER };
inline std::ostream & operator<<(std::ostream & os, const segment_type & type) {

View File

@ -239,8 +239,8 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates *
const auto & tmpl = chat_templates->template_tool_use
? *chat_templates->template_tool_use
: *chat_templates->template_default;
diff_analysis_result result = differential_analyzer::analyze(tmpl);
detect |= result.reasoning.mode != reasoning_mode::NONE;
autoparser::analyze_template result(tmpl);
detect |= result.reasoning.mode != autoparser::reasoning_mode::NONE;
return detect;
}
@ -752,7 +752,7 @@ static void foreach_parameter(const json &
std::string common_chat_template_direct_apply(
const common_chat_template & tmpl,
const struct templates_params & inputs,
const autoparser::templates_params & inputs,
const std::optional<json> & messages_override,
const std::optional<json> & tools_override,
const std::optional<json> & additional_context) {
@ -803,7 +803,7 @@ std::string common_chat_template_direct_apply(
}
static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl,
const struct templates_params & inputs) {
const autoparser::templates_params & inputs) {
common_chat_params data;
// Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
@ -917,7 +917,7 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
}
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl,
const struct templates_params & inputs) {
const autoparser::templates_params & inputs) {
common_chat_params data;
// Copy reasoning to the "thinking" field as expected by the gpt-oss template
@ -1063,7 +1063,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content}
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl,
const struct templates_params & inputs) {
const autoparser::templates_params & inputs) {
common_chat_params data;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
@ -1116,16 +1116,14 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
if (inputs.parallel_tool_calls) {
return p.choice({ content_and_tools, tools_only }) + p.end();
} else {
return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
}
} else {
if (inputs.parallel_tool_calls) {
return p.choice({ content_and_tools, content_only, tools_only }) + p.end();
}
auto content_and_tool = content_until_tool + tool_choice;
return p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
}
if (inputs.parallel_tool_calls) {
return p.choice({ content_and_tools, content_only, tools_only }) + p.end();
}
auto content_and_tool = content_until_tool + tool_choice;
return p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
});
data.parser = parser.save();
@ -1204,7 +1202,7 @@ static void func_args_not_string(json & messages) {
static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls,
const struct common_chat_templates_inputs & inputs) {
templates_params params;
autoparser::templates_params params;
params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
? *tmpls->template_tool_use
@ -1282,7 +1280,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
try {
LOG_DBG("Using differential autoparser\n");
auto auto_params = universal_peg_generator::generate_parser(tmpl, params);
auto auto_params = autoparser::universal_peg_generator::generate_parser(tmpl, params);
return auto_params;
} catch (const std::exception & e) {
LOG_WRN("Automatic parser generation failed: %s\n", e.what());

View File

@ -23,6 +23,10 @@ using json = nlohmann::ordered_json;
struct common_chat_templates;
namespace autoparser {
struct templates_params;
} // namespace autoparser
struct common_chat_tool_call {
std::string name;
std::string arguments;
@ -294,7 +298,7 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
std::string common_chat_template_direct_apply(
const common_chat_template & tmpl,
const struct templates_params & inputs,
const autoparser::templates_params & inputs,
const std::optional<json> & messages_override = std::nullopt,
const std::optional<json> & tools_override = std::nullopt,
const std::optional<json> & additional_context = std::nullopt);

View File

@ -10,6 +10,8 @@
#include <sstream>
#include <string>
using namespace autoparser;
static void test_calculate_diff_split_basic(testing & t);
static void test_calculate_diff_split_identical(testing & t);
static void test_calculate_diff_split_common_prefix(testing & t);
@ -591,7 +593,7 @@ static void test_compare_variants_basic(testing & t) {
p.messages[0]["content"] = "World";
};
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
auto result = autoparser::compare_variants(tmpl, params, modifier);
if (!t.assert_true("result should have value", result.has_value())) {
return;
@ -614,7 +616,7 @@ static void test_compare_variants_messages_modifier(testing & t) {
p.messages[0]["content"] = "B";
};
std::optional<compare_variants_result> result = differential_analyzer::compare_variants(tmpl, params, modifier);
std::optional<compare_variants_result> result = autoparser::compare_variants(tmpl, params, modifier);
if (!t.assert_true("result should have value", result.has_value())) {
return;
@ -637,7 +639,7 @@ static void test_compare_variants_tools_modifier(testing & t) {
p.tools[0]["name"] = "bar";
};
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
auto result = autoparser::compare_variants(tmpl, params, modifier);
if (!t.assert_true("result should have value", result.has_value())) {
return;
@ -661,7 +663,7 @@ static void test_compare_variants_both_modifiers(testing & t) {
p.messages[0]["role"] = "newuser";
};
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
auto result = autoparser::compare_variants(tmpl, params, modifier);
if (!t.assert_true("result should have value", result.has_value())) {
return;
@ -684,7 +686,7 @@ static void test_compare_variants_template_failure(testing & t) {
p.messages[0]["content"] = "World";
};
auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
auto result = autoparser::compare_variants(tmpl, params, modifier);
t.assert_true("result should be nullopt on template failure", !result.has_value());
}
@ -699,7 +701,7 @@ static void test_compare_variants_identity(testing & t) {
});
// No modifier - should use identity
auto result = differential_analyzer::compare_variants(tmpl, params, nullptr);
auto result = autoparser::compare_variants(tmpl, params, nullptr);
if (!t.assert_true("result should have value", result.has_value())) {
return;
@ -810,7 +812,7 @@ static void test_seed_oss_tool_presence(testing & t) {
params_with_tools.add_generation_prompt = false;
params_with_tools.enable_thinking = true;
auto result = differential_analyzer::compare_variants(tmpl, params_no_tools,
auto result = autoparser::compare_variants(tmpl, params_no_tools,
[&](template_params & p) {
p.messages = params_with_tools.messages;
});
@ -872,7 +874,7 @@ static void test_seed_oss_call_count(testing & t) {
params_one.add_generation_prompt = false;
params_one.enable_thinking = true;
auto result = differential_analyzer::compare_variants(tmpl, params_one,
auto result = autoparser::compare_variants(tmpl, params_one,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_two_calls});
});
@ -964,7 +966,7 @@ static void test_seed_oss_function_names(testing & t) {
params_alpha.add_generation_prompt = false;
params_alpha.enable_thinking = true;
auto result = differential_analyzer::compare_variants(tmpl, params_alpha,
auto result = autoparser::compare_variants(tmpl, params_alpha,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_func_beta});
});
@ -1068,7 +1070,7 @@ static void test_seed_oss_argument_count(testing & t) {
params_zero.add_generation_prompt = false;
params_zero.enable_thinking = true;
auto result_zero_one = differential_analyzer::compare_variants(tmpl, params_zero,
auto result_zero_one = autoparser::compare_variants(tmpl, params_zero,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_one_arg});
});
@ -1086,7 +1088,7 @@ static void test_seed_oss_argument_count(testing & t) {
params_one.add_generation_prompt = false;
params_one.enable_thinking = true;
auto result_one_two = differential_analyzer::compare_variants(tmpl, params_one,
auto result_one_two = autoparser::compare_variants(tmpl, params_one,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_two_args});
});
@ -1144,7 +1146,7 @@ static void test_seed_oss_args_presence(testing & t) {
params_same.enable_thinking = true;
// Test same arg vs other arg
auto result_same_other = differential_analyzer::compare_variants(tmpl, params_same,
auto result_same_other = autoparser::compare_variants(tmpl, params_same,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_other_arg});
});
@ -1163,7 +1165,7 @@ static void test_seed_oss_args_presence(testing & t) {
diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos);
// Test same arg vs both args
auto result_same_both = differential_analyzer::compare_variants(tmpl, params_same,
auto result_same_both = autoparser::compare_variants(tmpl, params_same,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_both_args});
});
@ -1212,7 +1214,7 @@ static void test_seed_oss_tool_with_reasoning(testing & t) {
params_tool_only.add_generation_prompt = false;
params_tool_only.enable_thinking = true;
auto result = differential_analyzer::compare_variants(tmpl, params_tool_only,
auto result = autoparser::compare_variants(tmpl, params_tool_only,
[&](template_params & p) {
p.messages = json::array({user_msg, assistant_tool_with_reasoning});
});
@ -1285,7 +1287,7 @@ static void test_nemotron_reasoning_detection(testing & t) {
params.enable_thinking = true;
// Run differential analysis
auto analysis = differential_analyzer::analyze(tmpl);
auto analysis = autoparser::analyze_template(tmpl);
// Check reasoning markers
t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.reasoning.start);
@ -1306,7 +1308,7 @@ static void test_nemotron_tool_format(testing & t) {
common_chat_template tmpl = load_nemotron_template(t);
// Run differential analysis
auto analysis = differential_analyzer::analyze(tmpl);
auto analysis = autoparser::analyze_template(tmpl);
// Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped)
t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.tools.format.section_start);
@ -1344,7 +1346,7 @@ static void test_cohere_reasoning_detection(testing & t) {
common_chat_template tmpl = load_cohere_template(t);
// Run differential analysis
auto analysis = differential_analyzer::analyze(tmpl);
auto analysis = autoparser::analyze_template(tmpl);
// Check reasoning markers - Cohere uses special token format
t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.reasoning.start);
@ -1365,7 +1367,7 @@ static void test_tool_format_cohere(testing & t) {
common_chat_template tmpl = load_cohere_template(t);
// Run differential analysis
auto analysis = differential_analyzer::analyze(tmpl);
auto analysis = autoparser::analyze_template(tmpl);
// Check tool section markers - Cohere uses ACTION markers
t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.tools.format.section_start);
@ -1772,12 +1774,12 @@ static void test_tagged_args_with_embedded_quotes(testing & t) {
auto tool_choice = p.choice();
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) continue;
if (!tool_def.contains("function")) { continue; }
const auto & function = tool_def.at("function");
std::string name = function.at("name");
const auto & params = function.at("parameters");
if (!params.contains("properties") || !params.at("properties").is_object()) continue;
if (!params.contains("properties") || !params.at("properties").is_object()) { continue; }
const auto & properties = params.at("properties");

View File

@ -279,7 +279,7 @@ static void render_scenario(const common_chat_template & tmpl,
LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
try {
templates_params inputs;
autoparser::templates_params inputs;
inputs.messages = final_messages;
inputs.add_generation_prompt = add_generation_prompt;
inputs.extra_context["enable_thinking"] = enable_thinking;
@ -395,10 +395,10 @@ int main(int argc, char ** argv) {
LOG_ERR(" TEMPLATE ANALYSIS\n");
LOG_ERR("================================================================================\n");
diff_analysis_result analysis = differential_analyzer::analyze(chat_template);
autoparser::analyze_template analysis(chat_template);
// Generate Parser
templates_params params;
autoparser::templates_params params;
params.messages = json::array();
params.reasoning_format =
opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
@ -414,7 +414,7 @@ int main(int argc, char ** argv) {
}
params.parallel_tool_calls = false;
auto parser_data = universal_peg_generator::generate_parser(chat_template, params, analysis);
auto parser_data = autoparser::universal_peg_generator::generate_parser(chat_template, params, analysis);
LOG_ERR("\n=== Differential Analysis Results ===\n");

View File

@ -400,12 +400,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
templates_params params_no_tools;
autoparser::templates_params params_no_tools;
params_no_tools.messages = json::array({ user_msg });
params_no_tools.add_generation_prompt = false;
params_no_tools.tools = json::array();
templates_params params_with_tools = params_no_tools;
autoparser::templates_params params_with_tools = params_no_tools;
params_with_tools.tools = tools;
std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
@ -419,12 +419,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
templates_params params_no_prompt;
autoparser::templates_params params_no_prompt;
params_no_prompt.messages = json::array({ user_msg });
params_no_prompt.add_generation_prompt = false;
params_no_prompt.tools = json::array();
templates_params params_with_prompt = params_no_prompt;
autoparser::templates_params params_with_prompt = params_no_prompt;
params_with_prompt.add_generation_prompt = true;
std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
@ -438,12 +438,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
templates_params params_no_reasoning;
autoparser::templates_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.enable_thinking = true;
templates_params params_with_reasoning = params_no_reasoning;
autoparser::templates_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
@ -458,12 +458,12 @@ static void analyze_template(const std::string & template_path) {
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2();
templates_params params_no_reasoning;
autoparser::templates_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.enable_thinking = true;
templates_params params_with_reasoning = params_no_reasoning;
autoparser::templates_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
@ -477,12 +477,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
templates_params params_no_tool;
autoparser::templates_params params_no_tool;
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
params_no_tool.add_generation_prompt = false;
params_no_tool.tools = tools;
templates_params params_with_tool = params_no_tool;
autoparser::templates_params params_with_tool = params_no_tool;
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
@ -497,12 +497,12 @@ static void analyze_template(const std::string & template_path) {
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2_continue();
templates_params params_no_tool;
autoparser::templates_params params_no_tool;
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
params_no_tool.add_generation_prompt = false;
params_no_tool.tools = tools;
templates_params params_with_tool = params_no_tool;
autoparser::templates_params params_with_tool = params_no_tool;
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
@ -516,12 +516,12 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
templates_params params_one_tool;
autoparser::templates_params params_one_tool;
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
params_one_tool.add_generation_prompt = false;
params_one_tool.tools = tools;
templates_params params_two_tools = params_one_tool;
autoparser::templates_params params_two_tools = params_one_tool;
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
@ -536,12 +536,12 @@ static void analyze_template(const std::string & template_path) {
json user_msg = make_user_msg();
json user_msg2 = make_user_msg2_continue();
templates_params params_one_tool;
autoparser::templates_params params_one_tool;
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
params_one_tool.add_generation_prompt = false;
params_one_tool.tools = tools;
templates_params params_two_tools = params_one_tool;
autoparser::templates_params params_two_tools = params_one_tool;
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
@ -555,13 +555,13 @@ static void analyze_template(const std::string & template_path) {
{
json user_msg = make_user_msg();
templates_params params_no_reasoning;
autoparser::templates_params params_no_reasoning;
params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
params_no_reasoning.add_generation_prompt = false;
params_no_reasoning.tools = tools;
params_no_reasoning.enable_thinking = true;
templates_params params_with_reasoning = params_no_reasoning;
autoparser::templates_params params_with_reasoning = params_no_reasoning;
params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);