diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
index 87d431add3..e9fe71c1d6 100644
--- a/common/chat-auto-parser-generator.cpp
+++ b/common/chat-auto-parser-generator.cpp
@@ -133,234 +133,252 @@ common_peg_parser universal_peg_generator::build_tool_parser(
const templates_params & inputs,
const common_peg_parser & reasoning) {
+ switch (analysis.tools) {
+ case tool_format::JSON_NATIVE:
+ return build_tool_parser_json_native(p, analysis, inputs, reasoning);
+ case tool_format::TAG_WITH_JSON:
+ return build_tool_parser_tag_json(p, analysis, inputs, reasoning);
+ case tool_format::TAG_WITH_TAGGED:
+ return build_tool_parser_tag_tagged(p, analysis, inputs, reasoning);
+ default:
+ GGML_ABORT("Unable to create tool parser");
+ }
+}
+
+common_peg_parser universal_peg_generator::build_tool_parser_json_native(
+ common_chat_peg_unified_builder & p,
+ const diff_analysis_result & analysis,
+ const templates_params & inputs,
+ const common_peg_parser & reasoning) {
+
const auto & m = analysis.markers;
- // Build tool choice parser based on format
+ // Build effective field names with dot notation if function_field is set
+ std::string name_field = analysis.name_field;
+ std::string args_field = analysis.args_field;
+
+ if (!analysis.function_field.empty() &&
+ analysis.function_field != "function" &&
+ name_field.find('.') == std::string::npos) {
+ name_field = analysis.function_field + "." + name_field;
+ args_field = analysis.function_field + "." + args_field;
+ }
+
+ auto tools_parser = p.standard_json_tools(
+ m.tool_section_start,
+ m.tool_section_end,
+ inputs.tools,
+ inputs.parallel_tool_calls,
+ inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
+ name_field,
+ args_field,
+ analysis.tools_array_wrapped,
+ analysis.fun_name_is_key,
+ analysis.id_field,
+ analysis.gen_id_field,
+ analysis.parameter_order
+ );
+
+ // Handle content wrappers if present
+ if (analysis.content == content_mode::ALWAYS_WRAPPED &&
+ !m.content_start.empty() && !m.content_end.empty()) {
+ auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end);
+ return reasoning + wrapped_content + tools_parser + p.end();
+ }
+
+ auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start);
+ return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
+}
+
+common_peg_parser universal_peg_generator::build_tool_parser_tag_json(
+ common_chat_peg_unified_builder & p,
+ const diff_analysis_result & analysis,
+ const templates_params & inputs,
+ const common_peg_parser & reasoning) {
+
+ const auto & m = analysis.markers;
common_peg_parser tool_choice = p.choice();
- if (analysis.tools == tool_format::JSON_NATIVE) {
- // Pure JSON format: use standard_json_tools helper
- // Build effective field names with dot notation if function_field is set
- std::string name_field = analysis.name_field;
- std::string args_field = analysis.args_field;
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ const auto & schema = function.at("parameters");
- if (!analysis.function_field.empty() &&
- analysis.function_field != "function" &&
- name_field.find('.') == std::string::npos) {
- name_field = analysis.function_field + "." + name_field;
- args_field = analysis.function_field + "." + args_field;
+ // Build call_id parser based on position (if supported)
+ common_peg_parser call_id_section = p.eps();
+ if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
+ !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
+ call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
}
- auto tools_parser = p.standard_json_tools(
- m.tool_section_start,
- m.tool_section_end,
- inputs.tools,
- inputs.parallel_tool_calls,
- inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
- name_field,
- args_field,
- analysis.tools_array_wrapped,
- analysis.fun_name_is_key,
- analysis.id_field,
- analysis.gen_id_field,
- analysis.parameter_order
- );
+ auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
+ call_id_section +
+ p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
- // Handle content wrappers if present
- if (analysis.content == content_mode::ALWAYS_WRAPPED &&
- !m.content_start.empty() && !m.content_end.empty()) {
- auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end);
- return reasoning + wrapped_content + tools_parser + p.end();
+ if (!m.func_close.empty()) {
+ func_parser = func_parser + m.func_close;
}
- auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start);
- return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
- }
+ tool_choice |= p.rule("tool-" + name, func_parser);
+ });
- if (analysis.tools == tool_format::TAG_WITH_JSON) {
- // Tag-based with JSON args: {args}
- // With optional call_id: [CALL_ID]id[ARGS]{args}
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- const auto & schema = function.at("parameters");
+ auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- // Build call_id parser based on position (if supported)
- common_peg_parser call_id_section = p.eps();
- if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
- !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
- // Optional call_id followed by required call_id_suffix (which is also args_start)
- // Format: optional([CALL_ID] + call_id_value) + [ARGS]
- call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
- }
+ common_peg_parser tool_calls = p.eps();
- auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
- call_id_section +
- p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
-
- if (!m.func_close.empty()) {
- func_parser = func_parser + m.func_close;
- }
-
- tool_choice |= p.rule("tool-" + name, func_parser);
- });
-
- auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
- common_peg_parser tool_calls = p.eps();
-
- if (!m.per_call_start.empty()) {
- // Per-call wrapping: each call individually wrapped
- auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end;
- if (inputs.parallel_tool_calls) {
- tool_calls = p.trigger_rule("tool-call",
- wrapped_call + p.zero_or_more(p.space() + wrapped_call));
- } else {
- tool_calls = p.trigger_rule("tool-call", wrapped_call);
- }
- if (!m.tool_section_start.empty()) {
- tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
- tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
- }
+ if (!m.per_call_start.empty()) {
+ auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end;
+ if (inputs.parallel_tool_calls) {
+ tool_calls = p.trigger_rule("tool-call",
+ wrapped_call + p.zero_or_more(p.space() + wrapped_call));
} else {
- std::string separator = m.call_separator;
- if (separator.empty()) {
- separator = ", "; // Default
- }
-
- if (inputs.parallel_tool_calls) {
- tool_calls = p.trigger_rule("tool-call",
- m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end);
- } else {
- tool_calls = p.trigger_rule("tool-call",
- m.tool_section_start + tool_choice + m.tool_section_end);
- }
+ tool_calls = p.trigger_rule("tool-call", wrapped_call);
+ }
+ if (!m.tool_section_start.empty()) {
+ tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
+ tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
+ }
+ } else {
+ std::string separator = m.call_separator;
+ if (separator.empty()) {
+ separator = ", "; // Default
}
- if (!require_calls) {
- tool_calls = p.optional(tool_calls);
- }
-
- std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
- auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
- return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
- }
-
- if (analysis.tools == tool_format::TAG_WITH_TAGGED) {
- // Tag-based with tagged args: value
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- const auto & params = function.at("parameters");
-
- if (!params.contains("properties") || !params.at("properties").is_object()) {
- return;
- }
-
- const auto & properties = params.at("properties");
- std::set required;
- if (params.contains("required") && params.at("required").is_array()) {
- params.at("required").get_to(required);
- }
-
- // Build parser for each argument
- std::vector arg_parsers;
- for (const auto & [param_name, param_schema] : properties.items()) {
- bool is_required = required.find(param_name) != required.end();
- auto type = param_schema.value("type", "object");
-
- auto arg = p.tool_arg(
- p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix +
- (type == "string" ?
- p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix),
- "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
- p.tool_arg_json_value(p.schema(p.json(),
- "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
- p.tool_arg_close(p.literal(m.arg_value_suffix))
- );
-
- if (is_required) {
- arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
- } else {
- arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
- }
- }
-
- // Build arg sequence with space() between consecutive args
- common_peg_parser args_seq = p.eps();
- for (size_t i = 0; i < arg_parsers.size(); i++) {
- if (i > 0) {
- args_seq = args_seq + p.space();
- }
- args_seq = args_seq + arg_parsers[i];
- }
-
- // Build call_id parser based on position (if supported)
- common_peg_parser call_id_section = p.eps();
- if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
- !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
- // Optional call_id followed by required call_id_suffix
- call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
- }
-
- auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
- call_id_section +
- p.space() + args_seq;
-
- if (!m.func_close.empty()) {
- func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close));
- } else if (!m.per_call_end.empty()) {
- // When there's no func_close but there is a per_call_end marker, use peek() to ensure
- // we only emit tool_close when we can actually see the closing marker. This prevents
- // premature closing during partial parsing when we've seen e.g. "" which could be
- // either "" (end) or "" prefix that failed to match.
- func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end)));
- } else {
- func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
- }
-
- tool_choice |= p.rule("tool-" + name, func_parser);
- });
-
- auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
- common_peg_parser tool_calls = p.eps();
-
- if (!m.per_call_start.empty()) {
- // Per-call wrapping: each call individually wrapped (e.g., ...)
- auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end;
- if (inputs.parallel_tool_calls) {
- tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
- } else {
- tool_calls = p.trigger_rule("tool-call", wrapped_call);
- }
- if (!m.tool_section_start.empty()) {
- tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
- tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
- }
+ if (inputs.parallel_tool_calls) {
+ tool_calls = p.trigger_rule("tool-call",
+ m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end);
} else {
- std::string separator = m.call_separator;
- if (separator.empty()) {
- separator = ", "; // Default
- }
-
- if (inputs.parallel_tool_calls) {
- tool_calls = p.trigger_rule("tool-call",
- m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end);
- } else {
- tool_calls = p.trigger_rule("tool-call",
- m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end);
- }
+ tool_calls = p.trigger_rule("tool-call",
+ m.tool_section_start + tool_choice + m.tool_section_end);
}
-
- if (!require_tools) {
- tool_calls = p.optional(tool_calls);
- }
-
- std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
- auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
- return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
- GGML_ABORT("Unable to create tool parser");
+ if (!require_calls) {
+ tool_calls = p.optional(tool_calls);
+ }
+
+ std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
+ auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+ return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
+}
+
+common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged(
+ common_chat_peg_unified_builder & p,
+ const diff_analysis_result & analysis,
+ const templates_params & inputs,
+ const common_peg_parser & reasoning) {
+
+ const auto & m = analysis.markers;
+ common_peg_parser tool_choice = p.choice();
+
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ const auto & params = function.at("parameters");
+
+ if (!params.contains("properties") || !params.at("properties").is_object()) {
+ return;
+ }
+
+ const auto & properties = params.at("properties");
+ std::set required;
+ if (params.contains("required") && params.at("required").is_array()) {
+ params.at("required").get_to(required);
+ }
+
+ // Build parser for each argument
+ std::vector arg_parsers;
+ for (const auto & [param_name, param_schema] : properties.items()) {
+ bool is_required = required.find(param_name) != required.end();
+ auto type = param_schema.value("type", "object");
+
+ auto arg = p.tool_arg(
+ p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix +
+ (type == "string" ?
+ p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix),
+ "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
+ p.tool_arg_json_value(p.schema(p.json(),
+ "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
+ p.tool_arg_close(p.literal(m.arg_value_suffix))
+ );
+
+ if (is_required) {
+ arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
+ } else {
+ arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+ }
+ }
+
+ // Build arg sequence with space() between consecutive args
+ common_peg_parser args_seq = p.eps();
+ for (size_t i = 0; i < arg_parsers.size(); i++) {
+ if (i > 0) {
+ args_seq = args_seq + p.space();
+ }
+ args_seq = args_seq + arg_parsers[i];
+ }
+
+ // Build call_id parser based on position (if supported)
+ common_peg_parser call_id_section = p.eps();
+ if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
+ !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
+ call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
+ }
+
+ auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
+ call_id_section +
+ p.space() + args_seq;
+
+ if (!m.func_close.empty()) {
+ func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close));
+ } else if (!m.per_call_end.empty()) {
+ // When there's no func_close but there is a per_call_end marker, use peek() to ensure
+ // we only emit tool_close when we can actually see the closing marker. This prevents
+ // premature closing during partial parsing when we've seen e.g. "" which could be
+ // either "" (end) or "" prefix that failed to match.
+ func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end)));
+ } else {
+ func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
+ }
+
+ tool_choice |= p.rule("tool-" + name, func_parser);
+ });
+
+ auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+ common_peg_parser tool_calls = p.eps();
+
+ if (!m.per_call_start.empty()) {
+ auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end;
+ if (inputs.parallel_tool_calls) {
+ tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+ } else {
+ tool_calls = p.trigger_rule("tool-call", wrapped_call);
+ }
+ if (!m.tool_section_start.empty()) {
+ tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
+ tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
+ }
+ } else {
+ std::string separator = m.call_separator;
+ if (separator.empty()) {
+ separator = ", "; // Default
+ }
+
+ if (inputs.parallel_tool_calls) {
+ tool_calls = p.trigger_rule("tool-call",
+ m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end);
+ } else {
+ tool_calls = p.trigger_rule("tool-call",
+ m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end);
+ }
+ }
+
+ if (!require_tools) {
+ tool_calls = p.optional(tool_calls);
+ }
+
+ std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
+ auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+ return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}
diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h
index c6587667d1..40f1fbe1bb 100644
--- a/common/chat-auto-parser.h
+++ b/common/chat-auto-parser.h
@@ -51,4 +51,20 @@ class universal_peg_generator {
const diff_analysis_result & analysis,
const templates_params & inputs,
const common_peg_parser & reasoning);
+
+ // Per-format tool parser builders
+ static common_peg_parser build_tool_parser_json_native(common_chat_peg_unified_builder & p,
+ const diff_analysis_result & analysis,
+ const templates_params & inputs,
+ const common_peg_parser & reasoning);
+
+ static common_peg_parser build_tool_parser_tag_json(common_chat_peg_unified_builder & p,
+ const diff_analysis_result & analysis,
+ const templates_params & inputs,
+ const common_peg_parser & reasoning);
+
+ static common_peg_parser build_tool_parser_tag_tagged(common_chat_peg_unified_builder & p,
+ const diff_analysis_result & analysis,
+ const templates_params & inputs,
+ const common_peg_parser & reasoning);
};
diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h
index 7933de5ce3..ce729df0e6 100644
--- a/common/chat-diff-analyzer.h
+++ b/common/chat-diff-analyzer.h
@@ -169,11 +169,7 @@ enum class tool_format {
NONE, // No tool support detected
JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}}
TAG_WITH_JSON, // Tag-based with JSON args: {...}
- BRACKET_TAG, // Bracket-tag: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...}
- PREFIXED_INDEXED, // Prefixed-indexed: functions.X:0{...}
- RECIPIENT_BASED, // Recipient routing: >>>func_name\n{...}
TAG_WITH_TAGGED, // Tag-based with tagged args: value
- MARKDOWN_BLOCK, // Markdown code block: Action:\n```json\n[...]\n```
};
inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
@@ -184,16 +180,8 @@ inline std::ostream & operator<<(std::ostream & os, const tool_format & format)
return os << "JSON_NATIVE";
case tool_format::TAG_WITH_JSON:
return os << "TAG_WITH_JSON";
- case tool_format::BRACKET_TAG:
- return os << "BRACKET_TAG";
- case tool_format::PREFIXED_INDEXED:
- return os << "PREFIXED_INDEXED";
- case tool_format::RECIPIENT_BASED:
- return os << "RECIPIENT_BASED";
case tool_format::TAG_WITH_TAGGED:
return os << "TAG_WITH_TAGGED";
- case tool_format::MARKDOWN_BLOCK:
- return os << "MARKDOWN_BLOCK";
default:
return os << "UNKNOWN";
}
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
index 2922c8d582..cb38fb160f 100644
--- a/common/chat-peg-parser.cpp
+++ b/common/chat-peg-parser.cpp
@@ -35,6 +35,45 @@ static std::string_view trim(std::string_view sv) {
return trim_trailing_space(trim_leading_space(sv, 1));
}
+// Count the number of unclosed '{' braces in a JSON-like string,
+// properly skipping braces inside quoted strings.
+static int json_brace_depth(const std::string & s) {
+ int depth = 0;
+ bool in_string = false;
+ bool escaped = false;
+ for (char c : s) {
+ if (escaped) {
+ escaped = false;
+ continue;
+ }
+ if (c == '\\' && in_string) {
+ escaped = true;
+ continue;
+ }
+ if (c == '"') {
+ in_string = !in_string;
+ continue;
+ }
+ if (!in_string) {
+ if (c == '{') {
+ depth++;
+ } else if (c == '}') {
+ depth--;
+ }
+ }
+ }
+ return depth;
+}
+
+// JSON-escape a string and return the inner content (without surrounding quotes).
+static std::string escape_json_string_inner(const std::string & s) {
+ std::string escaped = json(s).dump();
+ if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
+ return escaped.substr(1, escaped.size() - 2);
+ }
+ return escaped;
+}
+
// Convert Python-style single-quoted strings to JSON double-quoted strings
// Only converts outer string delimiters, properly handling escape sequences:
// - {'key': 'value'} -> {"key": "value"}
@@ -148,6 +187,10 @@ common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::stri
return zero_or_more(choice({ p, content_chunk }));
}
+std::string & common_chat_peg_unified_mapper::args_target() {
+ return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer;
+}
+
void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena,
const common_peg_parse_result & parse_result_arg) {
// Call base class to visit all nodes
@@ -156,15 +199,12 @@ void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & ar
// Flush any pending tool call that was started but never got a name
// This happens during partial parsing when the tool call is incomplete
if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
- // Transfer any buffered arguments
if (!args_buffer.empty()) {
pending_tool_call->arguments = args_buffer;
}
- // Close any open quotes in buffered args
- if (buffer_needs_closing_quote && !pending_tool_call->arguments.empty()) {
+ if (closing_quote_pending && !pending_tool_call->arguments.empty()) {
pending_tool_call->arguments += "\"";
}
- // Add the incomplete tool call to results
result.tool_calls.push_back(pending_tool_call.value());
pending_tool_call.reset();
}
@@ -187,15 +227,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE;
if (is_tool_open) {
- // Don't create tool call yet - wait for name to be known
- // This prevents sending incomplete tool calls in streaming mode
- pending_tool_call = common_chat_tool_call();
- current_tool = &pending_tool_call.value();
- arg_count = 0;
- // Clear the arguments buffer for the new tool
+ pending_tool_call = common_chat_tool_call();
+ current_tool = &pending_tool_call.value();
+ arg_count = 0;
args_buffer.clear();
- needs_closing_quote = false;
- buffer_needs_closing_quote = false;
+ closing_quote_pending = false;
}
if (is_tool_id && current_tool) {
@@ -208,15 +244,14 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
if (is_tool_name && current_tool) {
current_tool->name = std::string(trim_trailing_space(node.text));
- // Now that we have the name, we can populate the arguments from the buffer
+ // Now that we have the name, populate the arguments from the buffer
if (!args_buffer.empty()) {
current_tool->arguments = args_buffer;
args_buffer.clear();
} else if (current_tool->arguments.empty()) {
- // Initialize arguments if we're using tagged format and no buffered args
current_tool->arguments = "{";
}
- // Now that we have the name, add the tool call to the result
+ // Add the tool call to results so streaming can see it
if (pending_tool_call.has_value()) {
result.tool_calls.push_back(pending_tool_call.value());
pending_tool_call.reset();
@@ -225,28 +260,16 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
}
if (is_tool_args && current_tool) {
- // For JSON format, the arguments come as a complete JSON object
- // For tagged format, we build up arguments from individual arg_name/arg_value nodes
- // Check if this looks like JSON (starts with {) vs tagged format (starts with <)
+ // For JSON format: arguments come as a complete JSON object
+ // For tagged format: built up from individual arg_name/arg_value nodes
auto text = trim_trailing_space(node.text);
if (!text.empty() && text.front() == '{') {
- // If we have the tool name, populate directly; otherwise buffer
- if (!current_tool->name.empty()) {
- current_tool->arguments = std::string(text);
- } else {
- args_buffer = std::string(text);
- }
+ args_target() = std::string(text);
}
- // If it's tagged format, we ignore this and let arg_name/arg_value build up the JSON
}
if (is_arg_open) {
- // Reset for new argument
- if (!current_tool->name.empty()) {
- needs_closing_quote = false;
- } else {
- buffer_needs_closing_quote = false;
- }
+ closing_quote_pending = false;
}
if (is_arg_name && current_tool) {
@@ -257,15 +280,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
arg_entry += json(trim(node.text)).dump() + ":";
++arg_count;
- // If we have the tool name, add directly; otherwise buffer
- if (!current_tool->name.empty()) {
- current_tool->arguments += arg_entry;
- } else {
- if (args_buffer.empty()) {
- args_buffer = "{";
- }
- args_buffer += arg_entry;
+ auto & target = args_target();
+ if (target.empty()) {
+ target = "{";
}
+ target += arg_entry;
}
if ((is_arg_value || is_arg_string_value) && current_tool) {
@@ -273,160 +292,83 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
std::string value_to_add;
if (value_content.empty() && is_arg_string_value) {
- // Empty string value - start with opening quote
- // arg_close will add the closing quote
- if (!current_tool->name.empty()) {
- value_to_add = "\"";
- needs_closing_quote = true;
- } else {
- value_to_add = "\"";
- buffer_needs_closing_quote = true;
- }
+ // Empty string value - arg_close will add the closing quote
+ value_to_add = "\"";
+ closing_quote_pending = true;
} else if (!value_content.empty() && is_arg_string_value) {
// Schema declares this as string type - always treat as literal string value
- // Never try to parse as JSON (this ensures consistent handling of quoted strings
- // like "foo" which would otherwise be parsed as JSON string 'foo')
- if (!current_tool->name.empty()) {
- if (!needs_closing_quote) {
- value_to_add = "\"";
- needs_closing_quote = true;
- }
- } else {
- if (!buffer_needs_closing_quote) {
- value_to_add = "\"";
- buffer_needs_closing_quote = true;
- }
+ if (!closing_quote_pending) {
+ value_to_add = "\"";
+ closing_quote_pending = true;
}
- // Escape special characters in the string content
- std::string escaped = json(value_content).dump();
- // Remove the surrounding quotes from the escaped string
- if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
- escaped = escaped.substr(1, escaped.size() - 2);
- }
- value_to_add += escaped;
+ value_to_add += escape_json_string_inner(value_content);
} else if (!value_content.empty()) {
- // For potential containers, normalize Python-style single quotes to JSON double quotes first
- // This ensures consistent output during both partial and final parsing
+ // For potential containers, normalize Python-style single quotes to JSON double quotes
bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
if (is_potential_container) {
value_content = normalize_quotes_to_json(value_content);
}
// Try to parse as JSON value (number, bool, null, object, array)
- // For strings, we need special handling to support incremental parsing
try {
json parsed = json::parse(value_content);
if (parsed.is_string()) {
- // For string values, don't add closing quote yet (added by arg_close)
- // This ensures incremental parsing produces monotonic arguments
+ // Don't add closing quote yet (added by arg_close) for monotonic streaming
std::string escaped = parsed.dump();
- // Remove the trailing quote
if (!escaped.empty() && escaped.back() == '"') {
escaped.pop_back();
}
- value_to_add = escaped;
- if (!current_tool->name.empty()) {
- needs_closing_quote = true;
- } else {
- buffer_needs_closing_quote = true;
- }
+ value_to_add = escaped;
+ closing_quote_pending = true;
} else {
- // For non-string values (number, bool, null, object, array), add raw value content
- // Using raw content instead of dump() ensures monotonicity for streaming
- // (prevents issues with spaces being removed by dump())
+ // Non-string values: use raw content to preserve whitespace for monotonicity
value_to_add = value_content;
}
} catch (...) {
- // JSON parsing failed - content is either incomplete (partial) or not valid JSON
- // Note: potential containers were already normalized above, so value_content
- // already has double quotes if it started with [ or {
-
if (node.is_partial && is_potential_container) {
- // During incremental parsing, if it looks like a JSON container, don't wrap in quotes yet
- // and don't escape. Just pass through the (already normalized) content.
+ // Partial container: pass through the already-normalized content
value_to_add = value_content;
} else {
- // Not valid JSON and NOT a potential partial container - treat as string value
- // Add opening quote if not already in a string
- if (!current_tool->name.empty()) {
- if (!needs_closing_quote) {
- value_to_add = "\"";
- needs_closing_quote = true;
- }
- } else {
- if (!buffer_needs_closing_quote) {
- value_to_add = "\"";
- buffer_needs_closing_quote = true;
- }
+ // Not valid JSON - treat as string value
+ if (!closing_quote_pending) {
+ value_to_add = "\"";
+ closing_quote_pending = true;
}
- // Escape special characters in the string content
- std::string escaped = json(value_content).dump();
- // Remove the surrounding quotes from the escaped string
- if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
- escaped = escaped.substr(1, escaped.size() - 2);
- }
- value_to_add += escaped;
+ value_to_add += escape_json_string_inner(value_content);
}
}
}
- // If we have the tool name, add directly; otherwise buffer
- if (!current_tool->name.empty()) {
- current_tool->arguments += value_to_add;
- } else {
- if (args_buffer.empty()) {
- args_buffer = "{";
- }
- args_buffer += value_to_add;
- }
+ args_target() += value_to_add;
}
if (is_arg_close && current_tool) {
- if (!current_tool->name.empty()) {
- if (needs_closing_quote) {
- current_tool->arguments += "\"";
- needs_closing_quote = false;
- }
- } else {
- if (buffer_needs_closing_quote) {
- if (args_buffer.empty()) {
- args_buffer = "{";
- }
- args_buffer += "\"";
- buffer_needs_closing_quote = false;
- }
+ if (closing_quote_pending) {
+ args_target() += "\"";
+ closing_quote_pending = false;
}
}
if (is_tool_close && current_tool) {
- if (!current_tool->name.empty()) {
- if (needs_closing_quote) {
- current_tool->arguments += "\"";
- needs_closing_quote = false;
- }
- if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
- current_tool->arguments += "}";
- }
- // If we have a pending tool call that wasn't added yet, add it now
- if (pending_tool_call.has_value()) {
+ // Flush buffer to arguments if tool name was never seen
+ if (current_tool->name.empty() && !args_buffer.empty()) {
+ current_tool->arguments = args_buffer;
+ args_buffer.clear();
+ }
+ // Close any pending string quote
+ if (closing_quote_pending) {
+ current_tool->arguments += "\"";
+ closing_quote_pending = false;
+ }
+ // Close any unclosed braces (accounts for nested objects)
+ for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) {
+ current_tool->arguments += "}";
+ }
+ // Add tool call to results if named; otherwise discard
+ if (pending_tool_call.has_value()) {
+ if (!current_tool->name.empty()) {
result.tool_calls.push_back(pending_tool_call.value());
- pending_tool_call.reset();
}
- } else {
- // We're closing a tool without a name - flush the buffer
- if (!args_buffer.empty()) {
- current_tool->arguments = args_buffer;
- args_buffer.clear();
- }
- if (buffer_needs_closing_quote) {
- current_tool->arguments += "\"";
- buffer_needs_closing_quote = false;
- }
- // Close the arguments object if using tagged format
- if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
- current_tool->arguments += "}";
- }
- // Don't add to result if no name - this prevents incomplete tool calls
pending_tool_call.reset();
}
}
@@ -511,6 +453,241 @@ static std::pair parse_key_spec(const std::string & ke
return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
}
+// Mode 1: function_is_key — parse {"function_name": {...}}
+common_peg_parser common_chat_peg_unified_builder::build_json_tools_function_is_key(
+ const nlohmann::json & tools,
+ const std::string & args_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key) {
+
+ auto tool_choices = choice();
+
+ for (const auto & tool_def : tools) {
+ if (!tool_def.contains("function")) {
+ continue;
+ }
+ const auto & function = tool_def.at("function");
+ std::string name = function.at("name");
+ nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+ // Build inner object fields
+ std::vector inner_fields;
+
+ if (!call_id_key.empty()) {
+ auto id_parser = atomic(
+ literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+ literal("\"") + tool_id(json_string_content()) + literal("\"")
+ );
+ inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
+ }
+
+ if (!gen_call_id_key.empty()) {
+ auto gen_id_parser = atomic(
+ literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+ choice({
+ literal("\"") + tool_id(json_string_content()) + literal("\""),
+ tool_id(json_number())
+ })
+ );
+ inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
+ }
+
+ // Arguments — either wrapped in args_key or parsed directly
+ common_peg_parser args_parser = eps();
+ if (args_key.empty()) {
+ args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
+ } else {
+ args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
+ tool_args(schema(json(), "tool-" + name + "-schema", params));
+ }
+ inner_fields.push_back(args_parser);
+
+ // Build inner object parser
+ common_peg_parser inner_object = eps();
+ if (args_key.empty() && inner_fields.size() == 1) {
+ inner_object = inner_fields[0];
+ } else {
+ inner_object = literal("{") + space();
+ for (size_t i = 0; i < inner_fields.size(); i++) {
+ inner_object = inner_object + inner_fields[i];
+ if (i < inner_fields.size() - 1) {
+ inner_object = inner_object + space();
+ }
+ }
+ inner_object = inner_object + space() + literal("}");
+ }
+
+ auto tool_parser = tool(
+ tool_open(literal("{")) + space() +
+ literal("\"") + tool_name(literal(name)) + literal("\"") +
+ space() + literal(":") + space() +
+ inner_object +
+ space() + tool_close(literal("}"))
+ );
+
+ tool_choices |= rule("tool-" + name, tool_parser);
+ }
+
+ return tool_choices;
+}
+
+// Mode 2: Nested keys (dot notation like "function.name")
+common_peg_parser common_chat_peg_unified_builder::build_json_tools_nested_keys(
+ const nlohmann::json & tools,
+ const std::string & effective_name_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key) {
+
+ auto tool_choices = choice();
+
+ auto name_spec = parse_key_spec(effective_name_key);
+ auto args_spec = parse_key_spec(effective_args_key);
+
+ std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
+ std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
+ std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
+
+ for (const auto & tool_def : tools) {
+ if (!tool_def.contains("function")) {
+ continue;
+ }
+ const auto & function = tool_def.at("function");
+ std::string name = function.at("name");
+ nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+ auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
+ literal("\"") + tool_name(literal(name)) + literal("\"");
+ auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
+ tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+ auto nested_object = literal("{") + space() +
+ nested_name + space() + literal(",") + space() +
+ nested_args +
+ space() + literal("}");
+
+ // Format: { id?, "function": {...} }
+ auto tool_parser_body = tool_open(literal("{")) + space();
+
+ if (!call_id_key.empty()) {
+ auto id_spec = parse_key_spec(call_id_key);
+ if (id_spec.first.empty()) {
+ auto id_parser = atomic(
+ literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+ literal("\"") + tool_id(json_string_content()) + literal("\"")
+ );
+ tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
+ }
+ }
+
+ if (!gen_call_id_key.empty()) {
+ auto gen_id_spec = parse_key_spec(gen_call_id_key);
+ if (gen_id_spec.first.empty()) {
+ auto gen_id_parser = atomic(
+ literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+ choice({
+ literal("\"") + tool_id(json_string_content()) + literal("\""),
+ tool_id(json_number())
+ })
+ );
+ tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
+ }
+ }
+
+ auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
+ tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
+
+ tool_choices |= rule("tool-" + name, tool(tool_parser_body));
+ }
+
+ return tool_choices;
+}
+
+// Mode 3: Flat keys with optional ID fields and parameter ordering
+common_peg_parser common_chat_peg_unified_builder::build_json_tools_flat_keys(
+ const nlohmann::json & tools,
+ const std::string & effective_name_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key,
+ const std::vector & parameters_order) {
+
+ auto tool_choices = choice();
+ auto name_key_parser = literal("\"" + effective_name_key + "\"");
+ auto args_key_parser = literal("\"" + effective_args_key + "\"");
+
+ for (const auto & tool_def : tools) {
+ if (!tool_def.contains("function")) {
+ continue;
+ }
+ const auto & function = tool_def.at("function");
+ std::string name = function.at("name");
+ nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+ auto tool_name_ = name_key_parser + space() + literal(":") + space() +
+ literal("\"") + tool_name(literal(name)) + literal("\"");
+ auto tool_args_ = args_key_parser + space() + literal(":") + space() +
+ tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+ // Build ID parsers if keys are provided
+ common_peg_parser id_parser = eps();
+ if (!call_id_key.empty()) {
+ id_parser = atomic(
+ literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+ choice({
+ literal("\"") + tool_id(json_string_content()) + literal("\""),
+ tool_id(json_number())
+ })
+ );
+ }
+
+ common_peg_parser gen_id_parser = eps();
+ if (!gen_call_id_key.empty()) {
+ gen_id_parser = atomic(
+ literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+ choice({
+ literal("\"") + tool_id(json_string_content()) + literal("\""),
+ tool_id(json_number())
+ })
+ );
+ }
+
+ // Create (parser, key) pairs for all fields, then sort by parameters_order
+ std::vector> parser_pairs;
+ parser_pairs.emplace_back(tool_name_, effective_name_key);
+ parser_pairs.emplace_back(tool_args_, effective_args_key);
+ if (!call_id_key.empty()) {
+ parser_pairs.emplace_back(optional(id_parser), call_id_key);
+ }
+ if (!gen_call_id_key.empty()) {
+ parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
+ }
+
+ std::sort(parser_pairs.begin(), parser_pairs.end(),
+ [¶meters_order](const auto & a, const auto & b) {
+ auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
+ auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
+ size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
+ size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
+ return idx_a < idx_b;
+ });
+
+ auto ordered_body = tool_open(literal("{")) + space();
+ for (size_t i = 0; i < parser_pairs.size(); i++) {
+ ordered_body = ordered_body + parser_pairs[i].first;
+ if (i < parser_pairs.size() - 1) {
+ ordered_body = ordered_body + space() + literal(",") + space();
+ }
+ }
+ ordered_body = ordered_body + space() + tool_close(literal("}"));
+
+ tool_choices |= rule("tool-" + name, tool(ordered_body));
+ }
+
+ return tool_choices;
+}
+
common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
const std::string & section_start,
const std::string & section_end,
@@ -528,239 +705,20 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
return eps();
}
- // Build tool choices for JSON format
- auto tool_choices = choice();
- // auto other_member = json_string() + space() + literal(":") + space() + json();
-
- // Determine effective field names
std::string effective_name_key = name_key.empty() ? "name" : name_key;
std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
- // Check if we have nested keys (dot notation)
- auto name_spec = parse_key_spec(effective_name_key);
- auto args_spec = parse_key_spec(effective_args_key);
- bool has_nested_keys = !name_spec.first.empty() || !args_spec.first.empty();
-
- // Mode 1: function_is_key - parse {"function_name": {...}}
+ // Dispatch to the appropriate builder based on the JSON layout mode
+ common_peg_parser tool_choices = eps();
if (function_is_key) {
- for (const auto & tool_def : tools) {
- if (!tool_def.contains("function")) {
- continue;
- }
- const auto & function = tool_def.at("function");
- std::string name = function.at("name");
- nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
-
- // Build inner object fields
- std::vector inner_fields;
-
- // Add optional string ID field
- if (!call_id_key.empty()) {
- auto id_parser = atomic(
- literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
- literal("\"") + tool_id(json_string_content()) + literal("\"")
- );
- inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
- }
-
- // Add optional generated integer ID field
- if (!gen_call_id_key.empty()) {
- auto gen_id_parser = atomic(
- literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
- choice({
- literal("\"") + tool_id(json_string_content()) + literal("\""),
- tool_id(json_number())
- })
- );
- inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
- }
-
- // Add arguments - either wrapped in args_key or parsed directly
- common_peg_parser args_parser = eps();
- if (args_key.empty()) {
- // Arguments are directly the inner object value: {"func_name": {"arg1": "val"}}
- args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
- } else {
- // Arguments are wrapped in a key: {"func_name": {"arguments": {"arg1": "val"}}}
- args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
- tool_args(schema(json(), "tool-" + name + "-schema", params));
- }
- inner_fields.push_back(args_parser);
-
- // Build inner object parser - no greedy other_member skipping to avoid consuming ID
- common_peg_parser inner_object = eps();
- if (args_key.empty() && inner_fields.size() == 1) {
- // Direct arguments: {"func_name": {"arg1": "val"}}
- // The args_parser is already the full object schema
- inner_object = inner_fields[0];
- } else {
- // Wrapped arguments: {"func_name": {"arguments": {"arg1": "val"}}}
- inner_object = literal("{") + space();
- for (size_t i = 0; i < inner_fields.size(); i++) {
- inner_object = inner_object + inner_fields[i];
- if (i < inner_fields.size() - 1) {
- inner_object = inner_object + space();
- }
- }
- inner_object = inner_object + space() + literal("}");
- }
-
- // Tool call format: { "function_name": { inner_object } }
- auto tool_parser = tool(
- tool_open(literal("{")) + space() +
- literal("\"") + tool_name(literal(name)) + literal("\"") +
- space() + literal(":") + space() +
- inner_object +
- space() + tool_close(literal("}"))
- );
-
- tool_choices |= rule("tool-" + name, tool_parser);
- }
- }
- // Mode 2: Nested keys (dot notation like "function.name")
- else if (has_nested_keys) {
- // Group fields by prefix
- std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
- std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
- std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
-
- for (const auto & tool_def : tools) {
- if (!tool_def.contains("function")) {
- continue;
- }
- const auto & function = tool_def.at("function");
- std::string name = function.at("name");
- nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
-
- // Build nested object with name and arguments
- auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
- literal("\"") + tool_name(literal(name)) + literal("\"");
- auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
- tool_args(schema(json(), "tool-" + name + "-schema", params));
-
- auto nested_object = literal("{") + space() +
- nested_name + space() + literal(",") + space() +
- nested_args +
- space() + literal("}");
-
- // Build top-level parser - simpler structure without greedy other_member skipping
- // Format: { id?, "function": {...} }
- auto tool_parser_body = tool_open(literal("{")) + space();
-
- // Add optional string ID field at top level
- if (!call_id_key.empty()) {
- auto id_spec = parse_key_spec(call_id_key);
- if (id_spec.first.empty()) { // Top-level ID field
- auto id_parser = atomic(
- literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
- literal("\"") + tool_id(json_string_content()) + literal("\"")
- );
- tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
- }
- }
-
- // Add optional generated integer ID field at top level
- if (!gen_call_id_key.empty()) {
- auto gen_id_spec = parse_key_spec(gen_call_id_key);
- if (gen_id_spec.first.empty()) { // Top-level gen ID field
- auto gen_id_parser = atomic(
- literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
- choice({
- literal("\"") + tool_id(json_string_content()) + literal("\""),
- tool_id(json_number())
- })
- );
- tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
- }
- }
-
- // Add the nested object field
- auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
- tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
-
- tool_choices |= rule("tool-" + name, tool(tool_parser_body));
- }
- }
- // Mode 3: Flat keys (enhanced with ID fields and parameter ordering)
- else {
- auto name_key_parser = literal("\"" + effective_name_key + "\"");
- auto args_key_parser = literal("\"" + effective_args_key + "\"");
-
- for (const auto & tool_def : tools) {
- if (!tool_def.contains("function")) {
- continue;
- }
- const auto & function = tool_def.at("function");
- std::string name = function.at("name");
- nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
-
- auto tool_name_ = name_key_parser + space() + literal(":") + space() +
- literal("\"") + tool_name(literal(name)) + literal("\"");
- auto tool_args_ = args_key_parser + space() + literal(":") + space() +
- tool_args(schema(json(), "tool-" + name + "-schema", params));
-
- // Build ID parsers if keys are provided
- common_peg_parser id_parser = eps();
- if (!call_id_key.empty()) {
- id_parser = atomic(
- literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
- choice({
- literal("\"") + tool_id(json_string_content()) + literal("\""),
- tool_id(json_number())
- })
- );
- }
-
- common_peg_parser gen_id_parser = eps();
- if (!gen_call_id_key.empty()) {
- gen_id_parser = atomic(
- literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
- choice({
- literal("\"") + tool_id(json_string_content()) + literal("\""),
- tool_id(json_number())
- })
- );
- }
-
- common_peg_parser tool_parser = eps();
-
- // Use parameter ordering if provided - parse fields in specified order without greedy skipping
- if (!parameters_order.empty()) {
- }
- // Build parser using parameter ordering (works with or without explicit parameters_order)
- // Create list of (parser, key) pairs for all fields
- std::vector> parser_pairs;
- parser_pairs.emplace_back(tool_name_, effective_name_key);
- parser_pairs.emplace_back(tool_args_, effective_args_key);
- if (!call_id_key.empty()) {
- parser_pairs.emplace_back(optional(id_parser), call_id_key);
- }
- if (!gen_call_id_key.empty()) {
- parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
- }
-
- // Sort by position in parameters_order (or at end if not present)
- std::sort(parser_pairs.begin(), parser_pairs.end(),
- [¶meters_order](const auto & a, const auto & b) {
- auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
- auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
- size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
- size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
- return idx_a < idx_b;
- });
-
- // Build ordered parser
- auto ordered_body = tool_open(literal("{")) + space();
- for (size_t i = 0; i < parser_pairs.size(); i++) {
- ordered_body = ordered_body + parser_pairs[i].first;
- if (i < parser_pairs.size() - 1) {
- ordered_body = ordered_body + space() + literal(",") + space();
- }
- }
- ordered_body = ordered_body + space() + tool_close(literal("}"));
- tool_parser = tool(ordered_body);
-
- tool_choices |= rule("tool-" + name, tool_parser);
+ tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key);
+ } else {
+ auto name_spec = parse_key_spec(effective_name_key);
+ auto args_spec = parse_key_spec(effective_args_key);
+ if (!name_spec.first.empty() || !args_spec.first.empty()) {
+ tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
+ } else {
+ tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
}
}
@@ -770,7 +728,6 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
}
- // Optionally wrap in array brackets
if (array_wrapped) {
tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
}
diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h
index f5d49a403a..c0392f0c5d 100644
--- a/common/chat-peg-parser.h
+++ b/common/chat-peg-parser.h
@@ -108,6 +108,27 @@ class common_chat_peg_unified_builder : public common_chat_peg_builder {
const nlohmann::json & tools,
bool parallel_tool_calls,
bool force_tool_calls);
+
+ private:
+ // Implementation helpers for standard_json_tools — one per JSON tool call layout mode
+ common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools,
+ const std::string & args_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key);
+
+ common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools,
+ const std::string & effective_name_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key);
+
+ common_peg_parser build_json_tools_flat_keys(const nlohmann::json & tools,
+ const std::string & effective_name_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key,
+ const std::vector & parameters_order);
};
inline common_peg_arena build_chat_peg_unified_parser(
@@ -119,11 +140,14 @@ inline common_peg_arena build_chat_peg_unified_parser(
class common_chat_peg_unified_mapper : public common_chat_peg_mapper {
std::optional pending_tool_call; // Tool call waiting for name
- common_chat_tool_call * current_tool = nullptr;
- int arg_count = 0;
- bool needs_closing_quote = false;
+ common_chat_tool_call * current_tool = nullptr;
+ int arg_count = 0;
+ bool closing_quote_pending = false;
std::string args_buffer; // Buffer to delay arguments until tool name is known
- bool buffer_needs_closing_quote = false; // Track quote state for buffered args
+
+ // Returns a reference to the active argument destination string.
+ // Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments.
+ std::string & args_target();
public:
common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index e64e362129..d9f1eea2f2 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -369,6 +369,28 @@ static common_chat_tool amount_tool{
})",
};
+static common_chat_tool imaginary_number_tool{
+ /* .name = */ "imaginary_number",
+ /* .description = */ "Imaginary number converter",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "number": {
+ "type": "object",
+ "properties": {
+ "real": {
+ "type": "number"
+ },
+ "imaginary": {
+ "type": "number"
+ }
+ },
+ "required": ["real", "imaginary"]
+ }
+ },
+ "required": ["number"]
+ })",
+};
static common_chat_tool string_param_tool{
/* .name = */ "string_param",
@@ -394,7 +416,7 @@ static common_chat_tool quoted_unquoted_tool{
"quoted": {
"type": "string",
"description": "Quoted value"
- },
+ },
"unquoted": {
"type": "string",
"description": "Unquoted value"
@@ -2323,6 +2345,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
})
.run();
+ tst.test(
+ "Test imaginary number\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "{ \"real\": 3.14, \"imaginary\": 2.71 }\n"
+ "\n"
+ "\n"
+ "")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ imaginary_number_tool })
+ .expect_reasoning("Test imaginary number")
+ .expect_tool_calls({
+ { "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} }
+ })
+ .run();
+
}
}