Fix case with object inside object, refactor long methods.

This commit is contained in:
Piotr Wilkin 2026-02-07 23:24:29 +01:00
parent 2081e9b056
commit bd549b3b37
6 changed files with 656 additions and 612 deletions

View File

@ -133,234 +133,252 @@ common_peg_parser universal_peg_generator::build_tool_parser(
const templates_params & inputs,
const common_peg_parser & reasoning) {
switch (analysis.tools) {
case tool_format::JSON_NATIVE:
return build_tool_parser_json_native(p, analysis, inputs, reasoning);
case tool_format::TAG_WITH_JSON:
return build_tool_parser_tag_json(p, analysis, inputs, reasoning);
case tool_format::TAG_WITH_TAGGED:
return build_tool_parser_tag_tagged(p, analysis, inputs, reasoning);
default:
GGML_ABORT("Unable to create tool parser");
}
}
common_peg_parser universal_peg_generator::build_tool_parser_json_native(
common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning) {
const auto & m = analysis.markers;
// Build tool choice parser based on format
// Build effective field names with dot notation if function_field is set
std::string name_field = analysis.name_field;
std::string args_field = analysis.args_field;
if (!analysis.function_field.empty() &&
analysis.function_field != "function" &&
name_field.find('.') == std::string::npos) {
name_field = analysis.function_field + "." + name_field;
args_field = analysis.function_field + "." + args_field;
}
auto tools_parser = p.standard_json_tools(
m.tool_section_start,
m.tool_section_end,
inputs.tools,
inputs.parallel_tool_calls,
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
name_field,
args_field,
analysis.tools_array_wrapped,
analysis.fun_name_is_key,
analysis.id_field,
analysis.gen_id_field,
analysis.parameter_order
);
// Handle content wrappers if present
if (analysis.content == content_mode::ALWAYS_WRAPPED &&
!m.content_start.empty() && !m.content_end.empty()) {
auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end);
return reasoning + wrapped_content + tools_parser + p.end();
}
auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start);
return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
}
common_peg_parser universal_peg_generator::build_tool_parser_tag_json(
common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning) {
const auto & m = analysis.markers;
common_peg_parser tool_choice = p.choice();
if (analysis.tools == tool_format::JSON_NATIVE) {
// Pure JSON format: use standard_json_tools helper
// Build effective field names with dot notation if function_field is set
std::string name_field = analysis.name_field;
std::string args_field = analysis.args_field;
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & schema = function.at("parameters");
if (!analysis.function_field.empty() &&
analysis.function_field != "function" &&
name_field.find('.') == std::string::npos) {
name_field = analysis.function_field + "." + name_field;
args_field = analysis.function_field + "." + args_field;
// Build call_id parser based on position (if supported)
common_peg_parser call_id_section = p.eps();
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
}
auto tools_parser = p.standard_json_tools(
m.tool_section_start,
m.tool_section_end,
inputs.tools,
inputs.parallel_tool_calls,
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
name_field,
args_field,
analysis.tools_array_wrapped,
analysis.fun_name_is_key,
analysis.id_field,
analysis.gen_id_field,
analysis.parameter_order
);
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
call_id_section +
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
// Handle content wrappers if present
if (analysis.content == content_mode::ALWAYS_WRAPPED &&
!m.content_start.empty() && !m.content_end.empty()) {
auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end);
return reasoning + wrapped_content + tools_parser + p.end();
if (!m.func_close.empty()) {
func_parser = func_parser + m.func_close;
}
auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start);
return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
}
tool_choice |= p.rule("tool-" + name, func_parser);
});
if (analysis.tools == tool_format::TAG_WITH_JSON) {
// Tag-based with JSON args: <function=name>{args}</function>
// With optional call_id: <function=name>[CALL_ID]id[ARGS]{args}</function>
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & schema = function.at("parameters");
auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
// Build call_id parser based on position (if supported)
common_peg_parser call_id_section = p.eps();
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
// Optional call_id followed by required call_id_suffix (which is also args_start)
// Format: optional([CALL_ID] + call_id_value) + [ARGS]
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
}
common_peg_parser tool_calls = p.eps();
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
call_id_section +
p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
if (!m.func_close.empty()) {
func_parser = func_parser + m.func_close;
}
tool_choice |= p.rule("tool-" + name, func_parser);
});
auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
common_peg_parser tool_calls = p.eps();
if (!m.per_call_start.empty()) {
// Per-call wrapping: each call individually wrapped
auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end;
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
tool_calls = p.trigger_rule("tool-call", wrapped_call);
}
if (!m.tool_section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
}
if (!m.per_call_start.empty()) {
auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end;
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
std::string separator = m.call_separator;
if (separator.empty()) {
separator = ", "; // Default
}
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end);
} else {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + tool_choice + m.tool_section_end);
}
tool_calls = p.trigger_rule("tool-call", wrapped_call);
}
if (!m.tool_section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
}
} else {
std::string separator = m.call_separator;
if (separator.empty()) {
separator = ", "; // Default
}
if (!require_calls) {
tool_calls = p.optional(tool_calls);
}
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
if (analysis.tools == tool_format::TAG_WITH_TAGGED) {
// Tag-based with tagged args: <function=name><param=key>value</param></function>
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & params = function.at("parameters");
if (!params.contains("properties") || !params.at("properties").is_object()) {
return;
}
const auto & properties = params.at("properties");
std::set<std::string> required;
if (params.contains("required") && params.at("required").is_array()) {
params.at("required").get_to(required);
}
// Build parser for each argument
std::vector<common_peg_parser> arg_parsers;
for (const auto & [param_name, param_schema] : properties.items()) {
bool is_required = required.find(param_name) != required.end();
auto type = param_schema.value("type", "object");
auto arg = p.tool_arg(
p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix +
(type == "string" ?
p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix),
"tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
p.tool_arg_json_value(p.schema(p.json(),
"tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
p.tool_arg_close(p.literal(m.arg_value_suffix))
);
if (is_required) {
arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
} else {
arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
}
}
// Build arg sequence with space() between consecutive args
common_peg_parser args_seq = p.eps();
for (size_t i = 0; i < arg_parsers.size(); i++) {
if (i > 0) {
args_seq = args_seq + p.space();
}
args_seq = args_seq + arg_parsers[i];
}
// Build call_id parser based on position (if supported)
common_peg_parser call_id_section = p.eps();
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
// Optional call_id followed by required call_id_suffix
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
}
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
call_id_section +
p.space() + args_seq;
if (!m.func_close.empty()) {
func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close));
} else if (!m.per_call_end.empty()) {
// When there's no func_close but there is a per_call_end marker, use peek() to ensure
// we only emit tool_close when we can actually see the closing marker. This prevents
// premature closing during partial parsing when we've seen e.g. "</" which could be
// either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end)));
} else {
func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
}
tool_choice |= p.rule("tool-" + name, func_parser);
});
auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
common_peg_parser tool_calls = p.eps();
if (!m.per_call_start.empty()) {
// Per-call wrapping: each call individually wrapped (e.g., <tool_call>...</tool_call>)
auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end;
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
tool_calls = p.trigger_rule("tool-call", wrapped_call);
}
if (!m.tool_section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
}
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end);
} else {
std::string separator = m.call_separator;
if (separator.empty()) {
separator = ", "; // Default
}
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end);
} else {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end);
}
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + tool_choice + m.tool_section_end);
}
if (!require_tools) {
tool_calls = p.optional(tool_calls);
}
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
GGML_ABORT("Unable to create tool parser");
if (!require_calls) {
tool_calls = p.optional(tool_calls);
}
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged(
common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning) {
const auto & m = analysis.markers;
common_peg_parser tool_choice = p.choice();
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
const auto & params = function.at("parameters");
if (!params.contains("properties") || !params.at("properties").is_object()) {
return;
}
const auto & properties = params.at("properties");
std::set<std::string> required;
if (params.contains("required") && params.at("required").is_array()) {
params.at("required").get_to(required);
}
// Build parser for each argument
std::vector<common_peg_parser> arg_parsers;
for (const auto & [param_name, param_schema] : properties.items()) {
bool is_required = required.find(param_name) != required.end();
auto type = param_schema.value("type", "object");
auto arg = p.tool_arg(
p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix +
(type == "string" ?
p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix),
"tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
p.tool_arg_json_value(p.schema(p.json(),
"tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
p.tool_arg_close(p.literal(m.arg_value_suffix))
);
if (is_required) {
arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
} else {
arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
}
}
// Build arg sequence with space() between consecutive args
common_peg_parser args_seq = p.eps();
for (size_t i = 0; i < arg_parsers.size(); i++) {
if (i > 0) {
args_seq = args_seq + p.space();
}
args_seq = args_seq + arg_parsers[i];
}
// Build call_id parser based on position (if supported)
common_peg_parser call_id_section = p.eps();
if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
!m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
}
auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
call_id_section +
p.space() + args_seq;
if (!m.func_close.empty()) {
func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close));
} else if (!m.per_call_end.empty()) {
// When there's no func_close but there is a per_call_end marker, use peek() to ensure
// we only emit tool_close when we can actually see the closing marker. This prevents
// premature closing during partial parsing when we've seen e.g. "</" which could be
// either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end)));
} else {
func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
}
tool_choice |= p.rule("tool-" + name, func_parser);
});
auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
common_peg_parser tool_calls = p.eps();
if (!m.per_call_start.empty()) {
auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end;
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
tool_calls = p.trigger_rule("tool-call", wrapped_call);
}
if (!m.tool_section_start.empty()) {
tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
}
} else {
std::string separator = m.call_separator;
if (separator.empty()) {
separator = ", "; // Default
}
if (inputs.parallel_tool_calls) {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end);
} else {
tool_calls = p.trigger_rule("tool-call",
m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end);
}
}
if (!require_tools) {
tool_calls = p.optional(tool_calls);
}
std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}

View File

@ -51,4 +51,20 @@ class universal_peg_generator {
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
// Per-format tool parser builders
static common_peg_parser build_tool_parser_json_native(common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
static common_peg_parser build_tool_parser_tag_json(common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
static common_peg_parser build_tool_parser_tag_tagged(common_chat_peg_unified_builder & p,
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
};

View File

@ -169,11 +169,7 @@ enum class tool_format {
NONE, // No tool support detected
JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}}
TAG_WITH_JSON, // Tag-based with JSON args: <function=X>{...}</function>
BRACKET_TAG, // Bracket-tag: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...}
PREFIXED_INDEXED, // Prefixed-indexed: functions.X:0{...}
RECIPIENT_BASED, // Recipient routing: >>>func_name\n{...}
TAG_WITH_TAGGED, // Tag-based with tagged args: <param=key>value</param>
MARKDOWN_BLOCK, // Markdown code block: Action:\n```json\n[...]\n```
};
inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
@ -184,16 +180,8 @@ inline std::ostream & operator<<(std::ostream & os, const tool_format & format)
return os << "JSON_NATIVE";
case tool_format::TAG_WITH_JSON:
return os << "TAG_WITH_JSON";
case tool_format::BRACKET_TAG:
return os << "BRACKET_TAG";
case tool_format::PREFIXED_INDEXED:
return os << "PREFIXED_INDEXED";
case tool_format::RECIPIENT_BASED:
return os << "RECIPIENT_BASED";
case tool_format::TAG_WITH_TAGGED:
return os << "TAG_WITH_TAGGED";
case tool_format::MARKDOWN_BLOCK:
return os << "MARKDOWN_BLOCK";
default:
return os << "UNKNOWN";
}

View File

@ -35,6 +35,45 @@ static std::string_view trim(std::string_view sv) {
return trim_trailing_space(trim_leading_space(sv, 1));
}
// Count the number of unclosed '{' braces in a JSON-like string,
// properly skipping braces inside quoted strings.
static int json_brace_depth(const std::string & s) {
int depth = 0;
bool in_string = false;
bool escaped = false;
for (char c : s) {
if (escaped) {
escaped = false;
continue;
}
if (c == '\\' && in_string) {
escaped = true;
continue;
}
if (c == '"') {
in_string = !in_string;
continue;
}
if (!in_string) {
if (c == '{') {
depth++;
} else if (c == '}') {
depth--;
}
}
}
return depth;
}
// JSON-escape a string and return the inner content (without surrounding quotes).
static std::string escape_json_string_inner(const std::string & s) {
std::string escaped = json(s).dump();
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
return escaped.substr(1, escaped.size() - 2);
}
return escaped;
}
// Convert Python-style single-quoted strings to JSON double-quoted strings
// Only converts outer string delimiters, properly handling escape sequences:
// - {'key': 'value'} -> {"key": "value"}
@ -148,6 +187,10 @@ common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::stri
return zero_or_more(choice({ p, content_chunk }));
}
std::string & common_chat_peg_unified_mapper::args_target() {
return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer;
}
void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena,
const common_peg_parse_result & parse_result_arg) {
// Call base class to visit all nodes
@ -156,15 +199,12 @@ void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & ar
// Flush any pending tool call that was started but never got a name
// This happens during partial parsing when the tool call is incomplete
if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
// Transfer any buffered arguments
if (!args_buffer.empty()) {
pending_tool_call->arguments = args_buffer;
}
// Close any open quotes in buffered args
if (buffer_needs_closing_quote && !pending_tool_call->arguments.empty()) {
if (closing_quote_pending && !pending_tool_call->arguments.empty()) {
pending_tool_call->arguments += "\"";
}
// Add the incomplete tool call to results
result.tool_calls.push_back(pending_tool_call.value());
pending_tool_call.reset();
}
@ -187,15 +227,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE;
if (is_tool_open) {
// Don't create tool call yet - wait for name to be known
// This prevents sending incomplete tool calls in streaming mode
pending_tool_call = common_chat_tool_call();
current_tool = &pending_tool_call.value();
arg_count = 0;
// Clear the arguments buffer for the new tool
pending_tool_call = common_chat_tool_call();
current_tool = &pending_tool_call.value();
arg_count = 0;
args_buffer.clear();
needs_closing_quote = false;
buffer_needs_closing_quote = false;
closing_quote_pending = false;
}
if (is_tool_id && current_tool) {
@ -208,15 +244,14 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
if (is_tool_name && current_tool) {
current_tool->name = std::string(trim_trailing_space(node.text));
// Now that we have the name, we can populate the arguments from the buffer
// Now that we have the name, populate the arguments from the buffer
if (!args_buffer.empty()) {
current_tool->arguments = args_buffer;
args_buffer.clear();
} else if (current_tool->arguments.empty()) {
// Initialize arguments if we're using tagged format and no buffered args
current_tool->arguments = "{";
}
// Now that we have the name, add the tool call to the result
// Add the tool call to results so streaming can see it
if (pending_tool_call.has_value()) {
result.tool_calls.push_back(pending_tool_call.value());
pending_tool_call.reset();
@ -225,28 +260,16 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
}
if (is_tool_args && current_tool) {
// For JSON format, the arguments come as a complete JSON object
// For tagged format, we build up arguments from individual arg_name/arg_value nodes
// Check if this looks like JSON (starts with {) vs tagged format (starts with <)
// For JSON format: arguments come as a complete JSON object
// For tagged format: built up from individual arg_name/arg_value nodes
auto text = trim_trailing_space(node.text);
if (!text.empty() && text.front() == '{') {
// If we have the tool name, populate directly; otherwise buffer
if (!current_tool->name.empty()) {
current_tool->arguments = std::string(text);
} else {
args_buffer = std::string(text);
}
args_target() = std::string(text);
}
// If it's tagged format, we ignore this and let arg_name/arg_value build up the JSON
}
if (is_arg_open) {
// Reset for new argument
if (!current_tool->name.empty()) {
needs_closing_quote = false;
} else {
buffer_needs_closing_quote = false;
}
closing_quote_pending = false;
}
if (is_arg_name && current_tool) {
@ -257,15 +280,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
arg_entry += json(trim(node.text)).dump() + ":";
++arg_count;
// If we have the tool name, add directly; otherwise buffer
if (!current_tool->name.empty()) {
current_tool->arguments += arg_entry;
} else {
if (args_buffer.empty()) {
args_buffer = "{";
}
args_buffer += arg_entry;
auto & target = args_target();
if (target.empty()) {
target = "{";
}
target += arg_entry;
}
if ((is_arg_value || is_arg_string_value) && current_tool) {
@ -273,160 +292,83 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
std::string value_to_add;
if (value_content.empty() && is_arg_string_value) {
// Empty string value - start with opening quote
// arg_close will add the closing quote
if (!current_tool->name.empty()) {
value_to_add = "\"";
needs_closing_quote = true;
} else {
value_to_add = "\"";
buffer_needs_closing_quote = true;
}
// Empty string value - arg_close will add the closing quote
value_to_add = "\"";
closing_quote_pending = true;
} else if (!value_content.empty() && is_arg_string_value) {
// Schema declares this as string type - always treat as literal string value
// Never try to parse as JSON (this ensures consistent handling of quoted strings
// like "foo" which would otherwise be parsed as JSON string 'foo')
if (!current_tool->name.empty()) {
if (!needs_closing_quote) {
value_to_add = "\"";
needs_closing_quote = true;
}
} else {
if (!buffer_needs_closing_quote) {
value_to_add = "\"";
buffer_needs_closing_quote = true;
}
if (!closing_quote_pending) {
value_to_add = "\"";
closing_quote_pending = true;
}
// Escape special characters in the string content
std::string escaped = json(value_content).dump();
// Remove the surrounding quotes from the escaped string
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
escaped = escaped.substr(1, escaped.size() - 2);
}
value_to_add += escaped;
value_to_add += escape_json_string_inner(value_content);
} else if (!value_content.empty()) {
// For potential containers, normalize Python-style single quotes to JSON double quotes first
// This ensures consistent output during both partial and final parsing
// For potential containers, normalize Python-style single quotes to JSON double quotes
bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
if (is_potential_container) {
value_content = normalize_quotes_to_json(value_content);
}
// Try to parse as JSON value (number, bool, null, object, array)
// For strings, we need special handling to support incremental parsing
try {
json parsed = json::parse(value_content);
if (parsed.is_string()) {
// For string values, don't add closing quote yet (added by arg_close)
// This ensures incremental parsing produces monotonic arguments
// Don't add closing quote yet (added by arg_close) for monotonic streaming
std::string escaped = parsed.dump();
// Remove the trailing quote
if (!escaped.empty() && escaped.back() == '"') {
escaped.pop_back();
}
value_to_add = escaped;
if (!current_tool->name.empty()) {
needs_closing_quote = true;
} else {
buffer_needs_closing_quote = true;
}
value_to_add = escaped;
closing_quote_pending = true;
} else {
// For non-string values (number, bool, null, object, array), add raw value content
// Using raw content instead of dump() ensures monotonicity for streaming
// (prevents issues with spaces being removed by dump())
// Non-string values: use raw content to preserve whitespace for monotonicity
value_to_add = value_content;
}
} catch (...) {
// JSON parsing failed - content is either incomplete (partial) or not valid JSON
// Note: potential containers were already normalized above, so value_content
// already has double quotes if it started with [ or {
if (node.is_partial && is_potential_container) {
// During incremental parsing, if it looks like a JSON container, don't wrap in quotes yet
// and don't escape. Just pass through the (already normalized) content.
// Partial container: pass through the already-normalized content
value_to_add = value_content;
} else {
// Not valid JSON and NOT a potential partial container - treat as string value
// Add opening quote if not already in a string
if (!current_tool->name.empty()) {
if (!needs_closing_quote) {
value_to_add = "\"";
needs_closing_quote = true;
}
} else {
if (!buffer_needs_closing_quote) {
value_to_add = "\"";
buffer_needs_closing_quote = true;
}
// Not valid JSON - treat as string value
if (!closing_quote_pending) {
value_to_add = "\"";
closing_quote_pending = true;
}
// Escape special characters in the string content
std::string escaped = json(value_content).dump();
// Remove the surrounding quotes from the escaped string
if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
escaped = escaped.substr(1, escaped.size() - 2);
}
value_to_add += escaped;
value_to_add += escape_json_string_inner(value_content);
}
}
}
// If we have the tool name, add directly; otherwise buffer
if (!current_tool->name.empty()) {
current_tool->arguments += value_to_add;
} else {
if (args_buffer.empty()) {
args_buffer = "{";
}
args_buffer += value_to_add;
}
args_target() += value_to_add;
}
if (is_arg_close && current_tool) {
if (!current_tool->name.empty()) {
if (needs_closing_quote) {
current_tool->arguments += "\"";
needs_closing_quote = false;
}
} else {
if (buffer_needs_closing_quote) {
if (args_buffer.empty()) {
args_buffer = "{";
}
args_buffer += "\"";
buffer_needs_closing_quote = false;
}
if (closing_quote_pending) {
args_target() += "\"";
closing_quote_pending = false;
}
}
if (is_tool_close && current_tool) {
if (!current_tool->name.empty()) {
if (needs_closing_quote) {
current_tool->arguments += "\"";
needs_closing_quote = false;
}
if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
current_tool->arguments += "}";
}
// If we have a pending tool call that wasn't added yet, add it now
if (pending_tool_call.has_value()) {
// Flush buffer to arguments if tool name was never seen
if (current_tool->name.empty() && !args_buffer.empty()) {
current_tool->arguments = args_buffer;
args_buffer.clear();
}
// Close any pending string quote
if (closing_quote_pending) {
current_tool->arguments += "\"";
closing_quote_pending = false;
}
// Close any unclosed braces (accounts for nested objects)
for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) {
current_tool->arguments += "}";
}
// Add tool call to results if named; otherwise discard
if (pending_tool_call.has_value()) {
if (!current_tool->name.empty()) {
result.tool_calls.push_back(pending_tool_call.value());
pending_tool_call.reset();
}
} else {
// We're closing a tool without a name - flush the buffer
if (!args_buffer.empty()) {
current_tool->arguments = args_buffer;
args_buffer.clear();
}
if (buffer_needs_closing_quote) {
current_tool->arguments += "\"";
buffer_needs_closing_quote = false;
}
// Close the arguments object if using tagged format
if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
current_tool->arguments += "}";
}
// Don't add to result if no name - this prevents incomplete tool calls
pending_tool_call.reset();
}
}
@ -511,6 +453,241 @@ static std::pair<std::string, std::string> parse_key_spec(const std::string & ke
return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
}
// Mode 1: function_is_key — parse {"function_name": {...}}
common_peg_parser common_chat_peg_unified_builder::build_json_tools_function_is_key(
const nlohmann::json & tools,
const std::string & args_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key) {
auto tool_choices = choice();
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
// Build inner object fields
std::vector<common_peg_parser> inner_fields;
if (!call_id_key.empty()) {
auto id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
literal("\"") + tool_id(json_string_content()) + literal("\"")
);
inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
}
if (!gen_call_id_key.empty()) {
auto gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
}
// Arguments — either wrapped in args_key or parsed directly
common_peg_parser args_parser = eps();
if (args_key.empty()) {
args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
} else {
args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
}
inner_fields.push_back(args_parser);
// Build inner object parser
common_peg_parser inner_object = eps();
if (args_key.empty() && inner_fields.size() == 1) {
inner_object = inner_fields[0];
} else {
inner_object = literal("{") + space();
for (size_t i = 0; i < inner_fields.size(); i++) {
inner_object = inner_object + inner_fields[i];
if (i < inner_fields.size() - 1) {
inner_object = inner_object + space();
}
}
inner_object = inner_object + space() + literal("}");
}
auto tool_parser = tool(
tool_open(literal("{")) + space() +
literal("\"") + tool_name(literal(name)) + literal("\"") +
space() + literal(":") + space() +
inner_object +
space() + tool_close(literal("}"))
);
tool_choices |= rule("tool-" + name, tool_parser);
}
return tool_choices;
}
// Mode 2: Nested keys (dot notation like "function.name")
common_peg_parser common_chat_peg_unified_builder::build_json_tools_nested_keys(
const nlohmann::json & tools,
const std::string & effective_name_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key) {
auto tool_choices = choice();
auto name_spec = parse_key_spec(effective_name_key);
auto args_spec = parse_key_spec(effective_args_key);
std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
literal("\"") + tool_name(literal(name)) + literal("\"");
auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
auto nested_object = literal("{") + space() +
nested_name + space() + literal(",") + space() +
nested_args +
space() + literal("}");
// Format: { id?, "function": {...} }
auto tool_parser_body = tool_open(literal("{")) + space();
if (!call_id_key.empty()) {
auto id_spec = parse_key_spec(call_id_key);
if (id_spec.first.empty()) {
auto id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
literal("\"") + tool_id(json_string_content()) + literal("\"")
);
tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
}
}
if (!gen_call_id_key.empty()) {
auto gen_id_spec = parse_key_spec(gen_call_id_key);
if (gen_id_spec.first.empty()) {
auto gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
}
}
auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
tool_choices |= rule("tool-" + name, tool(tool_parser_body));
}
return tool_choices;
}
// Mode 3: Flat keys with optional ID fields and parameter ordering
common_peg_parser common_chat_peg_unified_builder::build_json_tools_flat_keys(
const nlohmann::json & tools,
const std::string & effective_name_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key,
const std::vector<std::string> & parameters_order) {
auto tool_choices = choice();
auto name_key_parser = literal("\"" + effective_name_key + "\"");
auto args_key_parser = literal("\"" + effective_args_key + "\"");
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
auto tool_name_ = name_key_parser + space() + literal(":") + space() +
literal("\"") + tool_name(literal(name)) + literal("\"");
auto tool_args_ = args_key_parser + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
// Build ID parsers if keys are provided
common_peg_parser id_parser = eps();
if (!call_id_key.empty()) {
id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
}
common_peg_parser gen_id_parser = eps();
if (!gen_call_id_key.empty()) {
gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
}
// Create (parser, key) pairs for all fields, then sort by parameters_order
std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
parser_pairs.emplace_back(tool_name_, effective_name_key);
parser_pairs.emplace_back(tool_args_, effective_args_key);
if (!call_id_key.empty()) {
parser_pairs.emplace_back(optional(id_parser), call_id_key);
}
if (!gen_call_id_key.empty()) {
parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
}
std::sort(parser_pairs.begin(), parser_pairs.end(),
[&parameters_order](const auto & a, const auto & b) {
auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
return idx_a < idx_b;
});
auto ordered_body = tool_open(literal("{")) + space();
for (size_t i = 0; i < parser_pairs.size(); i++) {
ordered_body = ordered_body + parser_pairs[i].first;
if (i < parser_pairs.size() - 1) {
ordered_body = ordered_body + space() + literal(",") + space();
}
}
ordered_body = ordered_body + space() + tool_close(literal("}"));
tool_choices |= rule("tool-" + name, tool(ordered_body));
}
return tool_choices;
}
common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
const std::string & section_start,
const std::string & section_end,
@ -528,239 +705,20 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
return eps();
}
// Build tool choices for JSON format
auto tool_choices = choice();
// auto other_member = json_string() + space() + literal(":") + space() + json();
// Determine effective field names
std::string effective_name_key = name_key.empty() ? "name" : name_key;
std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
// Check if we have nested keys (dot notation)
auto name_spec = parse_key_spec(effective_name_key);
auto args_spec = parse_key_spec(effective_args_key);
bool has_nested_keys = !name_spec.first.empty() || !args_spec.first.empty();
// Mode 1: function_is_key - parse {"function_name": {...}}
// Dispatch to the appropriate builder based on the JSON layout mode
common_peg_parser tool_choices = eps();
if (function_is_key) {
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
// Build inner object fields
std::vector<common_peg_parser> inner_fields;
// Add optional string ID field
if (!call_id_key.empty()) {
auto id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
literal("\"") + tool_id(json_string_content()) + literal("\"")
);
inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
}
// Add optional generated integer ID field
if (!gen_call_id_key.empty()) {
auto gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
}
// Add arguments - either wrapped in args_key or parsed directly
common_peg_parser args_parser = eps();
if (args_key.empty()) {
// Arguments are directly the inner object value: {"func_name": {"arg1": "val"}}
args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
} else {
// Arguments are wrapped in a key: {"func_name": {"arguments": {"arg1": "val"}}}
args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
}
inner_fields.push_back(args_parser);
// Build inner object parser - no greedy other_member skipping to avoid consuming ID
common_peg_parser inner_object = eps();
if (args_key.empty() && inner_fields.size() == 1) {
// Direct arguments: {"func_name": {"arg1": "val"}}
// The args_parser is already the full object schema
inner_object = inner_fields[0];
} else {
// Wrapped arguments: {"func_name": {"arguments": {"arg1": "val"}}}
inner_object = literal("{") + space();
for (size_t i = 0; i < inner_fields.size(); i++) {
inner_object = inner_object + inner_fields[i];
if (i < inner_fields.size() - 1) {
inner_object = inner_object + space();
}
}
inner_object = inner_object + space() + literal("}");
}
// Tool call format: { "function_name": { inner_object } }
auto tool_parser = tool(
tool_open(literal("{")) + space() +
literal("\"") + tool_name(literal(name)) + literal("\"") +
space() + literal(":") + space() +
inner_object +
space() + tool_close(literal("}"))
);
tool_choices |= rule("tool-" + name, tool_parser);
}
}
// Mode 2: Nested keys (dot notation like "function.name")
else if (has_nested_keys) {
// Group fields by prefix
std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
// Build nested object with name and arguments
auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
literal("\"") + tool_name(literal(name)) + literal("\"");
auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
auto nested_object = literal("{") + space() +
nested_name + space() + literal(",") + space() +
nested_args +
space() + literal("}");
// Build top-level parser - simpler structure without greedy other_member skipping
// Format: { id?, "function": {...} }
auto tool_parser_body = tool_open(literal("{")) + space();
// Add optional string ID field at top level
if (!call_id_key.empty()) {
auto id_spec = parse_key_spec(call_id_key);
if (id_spec.first.empty()) { // Top-level ID field
auto id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
literal("\"") + tool_id(json_string_content()) + literal("\"")
);
tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
}
}
// Add optional generated integer ID field at top level
if (!gen_call_id_key.empty()) {
auto gen_id_spec = parse_key_spec(gen_call_id_key);
if (gen_id_spec.first.empty()) { // Top-level gen ID field
auto gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
}
}
// Add the nested object field
auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
tool_choices |= rule("tool-" + name, tool(tool_parser_body));
}
}
// Mode 3: Flat keys (enhanced with ID fields and parameter ordering)
else {
auto name_key_parser = literal("\"" + effective_name_key + "\"");
auto args_key_parser = literal("\"" + effective_args_key + "\"");
for (const auto & tool_def : tools) {
if (!tool_def.contains("function")) {
continue;
}
const auto & function = tool_def.at("function");
std::string name = function.at("name");
nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
auto tool_name_ = name_key_parser + space() + literal(":") + space() +
literal("\"") + tool_name(literal(name)) + literal("\"");
auto tool_args_ = args_key_parser + space() + literal(":") + space() +
tool_args(schema(json(), "tool-" + name + "-schema", params));
// Build ID parsers if keys are provided
common_peg_parser id_parser = eps();
if (!call_id_key.empty()) {
id_parser = atomic(
literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
}
common_peg_parser gen_id_parser = eps();
if (!gen_call_id_key.empty()) {
gen_id_parser = atomic(
literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
choice({
literal("\"") + tool_id(json_string_content()) + literal("\""),
tool_id(json_number())
})
);
}
common_peg_parser tool_parser = eps();
// Use parameter ordering if provided - parse fields in specified order without greedy skipping
if (!parameters_order.empty()) {
}
// Build parser using parameter ordering (works with or without explicit parameters_order)
// Create list of (parser, key) pairs for all fields
std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
parser_pairs.emplace_back(tool_name_, effective_name_key);
parser_pairs.emplace_back(tool_args_, effective_args_key);
if (!call_id_key.empty()) {
parser_pairs.emplace_back(optional(id_parser), call_id_key);
}
if (!gen_call_id_key.empty()) {
parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
}
// Sort by position in parameters_order (or at end if not present)
std::sort(parser_pairs.begin(), parser_pairs.end(),
[&parameters_order](const auto & a, const auto & b) {
auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
return idx_a < idx_b;
});
// Build ordered parser
auto ordered_body = tool_open(literal("{")) + space();
for (size_t i = 0; i < parser_pairs.size(); i++) {
ordered_body = ordered_body + parser_pairs[i].first;
if (i < parser_pairs.size() - 1) {
ordered_body = ordered_body + space() + literal(",") + space();
}
}
ordered_body = ordered_body + space() + tool_close(literal("}"));
tool_parser = tool(ordered_body);
tool_choices |= rule("tool-" + name, tool_parser);
tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key);
} else {
auto name_spec = parse_key_spec(effective_name_key);
auto args_spec = parse_key_spec(effective_args_key);
if (!name_spec.first.empty() || !args_spec.first.empty()) {
tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
} else {
tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
}
}
@ -770,7 +728,6 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
}
// Optionally wrap in array brackets
if (array_wrapped) {
tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
}

View File

@ -108,6 +108,27 @@ class common_chat_peg_unified_builder : public common_chat_peg_builder {
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls);
private:
// Implementation helpers for standard_json_tools — one per JSON tool call layout mode
common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools,
const std::string & args_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key);
common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools,
const std::string & effective_name_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key);
common_peg_parser build_json_tools_flat_keys(const nlohmann::json & tools,
const std::string & effective_name_key,
const std::string & effective_args_key,
const std::string & call_id_key,
const std::string & gen_call_id_key,
const std::vector<std::string> & parameters_order);
};
inline common_peg_arena build_chat_peg_unified_parser(
@ -119,11 +140,14 @@ inline common_peg_arena build_chat_peg_unified_parser(
class common_chat_peg_unified_mapper : public common_chat_peg_mapper {
std::optional<common_chat_tool_call> pending_tool_call; // Tool call waiting for name
common_chat_tool_call * current_tool = nullptr;
int arg_count = 0;
bool needs_closing_quote = false;
common_chat_tool_call * current_tool = nullptr;
int arg_count = 0;
bool closing_quote_pending = false;
std::string args_buffer; // Buffer to delay arguments until tool name is known
bool buffer_needs_closing_quote = false; // Track quote state for buffered args
// Returns a reference to the active argument destination string.
// Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments.
std::string & args_target();
public:
common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}

View File

@ -369,6 +369,28 @@ static common_chat_tool amount_tool{
})",
};
static common_chat_tool imaginary_number_tool{
/* .name = */ "imaginary_number",
/* .description = */ "Imaginary number converter",
/* .parameters = */ R"({
"type": "object",
"properties": {
"number": {
"type": "object",
"properties": {
"real": {
"type": "number"
},
"imaginary": {
"type": "number"
}
},
"required": ["real", "imaginary"]
}
},
"required": ["number"]
})",
};
static common_chat_tool string_param_tool{
/* .name = */ "string_param",
@ -394,7 +416,7 @@ static common_chat_tool quoted_unquoted_tool{
"quoted": {
"type": "string",
"description": "Quoted value"
},
},
"unquoted": {
"type": "string",
"description": "Unquoted value"
@ -2323,6 +2345,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
})
.run();
tst.test(
"Test imaginary number\n"
"</think>\n"
"<tool_call>\n"
"<function=imaginary_number>\n"
"<parameter=number>\n"
"{ \"real\": 3.14, \"imaginary\": 2.71 }\n"
"</parameter>\n"
"</function>\n"
"</tool_call>")
.enable_thinking(true)
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
.tools({ imaginary_number_tool })
.expect_reasoning("Test imaginary number")
.expect_tool_calls({
{ "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} }
})
.run();
}
}