diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 87d431add3..e9fe71c1d6 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -133,234 +133,252 @@ common_peg_parser universal_peg_generator::build_tool_parser( const templates_params & inputs, const common_peg_parser & reasoning) { + switch (analysis.tools) { + case tool_format::JSON_NATIVE: + return build_tool_parser_json_native(p, analysis, inputs, reasoning); + case tool_format::TAG_WITH_JSON: + return build_tool_parser_tag_json(p, analysis, inputs, reasoning); + case tool_format::TAG_WITH_TAGGED: + return build_tool_parser_tag_tagged(p, analysis, inputs, reasoning); + default: + GGML_ABORT("Unable to create tool parser"); + } +} + +common_peg_parser universal_peg_generator::build_tool_parser_json_native( + common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning) { + const auto & m = analysis.markers; - // Build tool choice parser based on format + // Build effective field names with dot notation if function_field is set + std::string name_field = analysis.name_field; + std::string args_field = analysis.args_field; + + if (!analysis.function_field.empty() && + analysis.function_field != "function" && + name_field.find('.') == std::string::npos) { + name_field = analysis.function_field + "." + name_field; + args_field = analysis.function_field + "." + args_field; + } + + auto tools_parser = p.standard_json_tools( + m.tool_section_start, + m.tool_section_end, + inputs.tools, + inputs.parallel_tool_calls, + inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, + name_field, + args_field, + analysis.tools_array_wrapped, + analysis.fun_name_is_key, + analysis.id_field, + analysis.gen_id_field, + analysis.parameter_order + ); + + // Handle content wrappers if present + if (analysis.content == content_mode::ALWAYS_WRAPPED && + !m.content_start.empty() && !m.content_end.empty()) { + auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end); + return reasoning + wrapped_content + tools_parser + p.end(); + } + + auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start); + return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); +} + +common_peg_parser universal_peg_generator::build_tool_parser_tag_json( + common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning) { + + const auto & m = analysis.markers; common_peg_parser tool_choice = p.choice(); - if (analysis.tools == tool_format::JSON_NATIVE) { - // Pure JSON format: use standard_json_tools helper - // Build effective field names with dot notation if function_field is set - std::string name_field = analysis.name_field; - std::string args_field = analysis.args_field; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & schema = function.at("parameters"); - if (!analysis.function_field.empty() && - analysis.function_field != "function" && - name_field.find('.') == std::string::npos) { - name_field = analysis.function_field + "." + name_field; - args_field = analysis.function_field + "." + args_field; + // Build call_id parser based on position (if supported) + common_peg_parser call_id_section = p.eps(); + if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { + call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; } - auto tools_parser = p.standard_json_tools( - m.tool_section_start, - m.tool_section_end, - inputs.tools, - inputs.parallel_tool_calls, - inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, - name_field, - args_field, - analysis.tools_array_wrapped, - analysis.fun_name_is_key, - analysis.id_field, - analysis.gen_id_field, - analysis.parameter_order - ); + auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + + call_id_section + + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)); - // Handle content wrappers if present - if (analysis.content == content_mode::ALWAYS_WRAPPED && - !m.content_start.empty() && !m.content_end.empty()) { - auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end); - return reasoning + wrapped_content + tools_parser + p.end(); + if (!m.func_close.empty()) { + func_parser = func_parser + m.func_close; } - auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start); - return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end(); - } + tool_choice |= p.rule("tool-" + name, func_parser); + }); - if (analysis.tools == tool_format::TAG_WITH_JSON) { - // Tag-based with JSON args: {args} - // With optional call_id: [CALL_ID]id[ARGS]{args} - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & schema = function.at("parameters"); + auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; - // Build call_id parser based on position (if supported) - common_peg_parser call_id_section = p.eps(); - if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && - !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { - // Optional call_id followed by required call_id_suffix (which is also args_start) - // Format: optional([CALL_ID] + call_id_value) + [ARGS] - call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; - } + common_peg_parser tool_calls = p.eps(); - auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + - call_id_section + - p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)); - - if (!m.func_close.empty()) { - func_parser = func_parser + m.func_close; - } - - tool_choice |= p.rule("tool-" + name, func_parser); - }); - - auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - common_peg_parser tool_calls = p.eps(); - - if (!m.per_call_start.empty()) { - // Per-call wrapping: each call individually wrapped - auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end; - if (inputs.parallel_tool_calls) { - tool_calls = p.trigger_rule("tool-call", - wrapped_call + p.zero_or_more(p.space() + wrapped_call)); - } else { - tool_calls = p.trigger_rule("tool-call", wrapped_call); - } - if (!m.tool_section_start.empty()) { - tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + - tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); - } + if (!m.per_call_start.empty()) { + auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end; + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + wrapped_call + p.zero_or_more(p.space() + wrapped_call)); } else { - std::string separator = m.call_separator; - if (separator.empty()) { - separator = ", "; // Default - } - - if (inputs.parallel_tool_calls) { - tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end); - } else { - tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + tool_choice + m.tool_section_end); - } + tool_calls = p.trigger_rule("tool-call", wrapped_call); + } + if (!m.tool_section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + + tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); + } + } else { + std::string separator = m.call_separator; + if (separator.empty()) { + separator = ", "; // Default } - if (!require_calls) { - tool_calls = p.optional(tool_calls); - } - - std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; - auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); - } - - if (analysis.tools == tool_format::TAG_WITH_TAGGED) { - // Tag-based with tagged args: value - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool.at("function"); - std::string name = function.at("name"); - const auto & params = function.at("parameters"); - - if (!params.contains("properties") || !params.at("properties").is_object()) { - return; - } - - const auto & properties = params.at("properties"); - std::set required; - if (params.contains("required") && params.at("required").is_array()) { - params.at("required").get_to(required); - } - - // Build parser for each argument - std::vector arg_parsers; - for (const auto & [param_name, param_schema] : properties.items()) { - bool is_required = required.find(param_name) != required.end(); - auto type = param_schema.value("type", "object"); - - auto arg = p.tool_arg( - p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix + - (type == "string" ? - p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix), - "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : - p.tool_arg_json_value(p.schema(p.json(), - "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) + - p.tool_arg_close(p.literal(m.arg_value_suffix)) - ); - - if (is_required) { - arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg)); - } else { - arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); - } - } - - // Build arg sequence with space() between consecutive args - common_peg_parser args_seq = p.eps(); - for (size_t i = 0; i < arg_parsers.size(); i++) { - if (i > 0) { - args_seq = args_seq + p.space(); - } - args_seq = args_seq + arg_parsers[i]; - } - - // Build call_id parser based on position (if supported) - common_peg_parser call_id_section = p.eps(); - if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && - !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { - // Optional call_id followed by required call_id_suffix - call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; - } - - auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + - call_id_section + - p.space() + args_seq; - - if (!m.func_close.empty()) { - func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close)); - } else if (!m.per_call_end.empty()) { - // When there's no func_close but there is a per_call_end marker, use peek() to ensure - // we only emit tool_close when we can actually see the closing marker. This prevents - // premature closing during partial parsing when we've seen e.g. "" (end) or "" prefix that failed to match. - func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end))); - } else { - func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper - } - - tool_choice |= p.rule("tool-" + name, func_parser); - }); - - auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; - - common_peg_parser tool_calls = p.eps(); - - if (!m.per_call_start.empty()) { - // Per-call wrapping: each call individually wrapped (e.g., ...) - auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end; - if (inputs.parallel_tool_calls) { - tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); - } else { - tool_calls = p.trigger_rule("tool-call", wrapped_call); - } - if (!m.tool_section_start.empty()) { - tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + - tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); - } + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end); } else { - std::string separator = m.call_separator; - if (separator.empty()) { - separator = ", "; // Default - } - - if (inputs.parallel_tool_calls) { - tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end); - } else { - tool_calls = p.trigger_rule("tool-call", - m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end); - } + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + tool_choice + m.tool_section_end); } - - if (!require_tools) { - tool_calls = p.optional(tool_calls); - } - - std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; - auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); } - GGML_ABORT("Unable to create tool parser"); + if (!require_calls) { + tool_calls = p.optional(tool_calls); + } + + std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; + auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); + return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); +} + +common_peg_parser universal_peg_generator::build_tool_parser_tag_tagged( + common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning) { + + const auto & m = analysis.markers; + common_peg_parser tool_choice = p.choice(); + + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + const auto & params = function.at("parameters"); + + if (!params.contains("properties") || !params.at("properties").is_object()) { + return; + } + + const auto & properties = params.at("properties"); + std::set required; + if (params.contains("required") && params.at("required").is_array()) { + params.at("required").get_to(required); + } + + // Build parser for each argument + std::vector arg_parsers; + for (const auto & [param_name, param_schema] : properties.items()) { + bool is_required = required.find(param_name) != required.end(); + auto type = param_schema.value("type", "object"); + + auto arg = p.tool_arg( + p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix + + (type == "string" ? + p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : + p.tool_arg_json_value(p.schema(p.json(), + "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) + + p.tool_arg_close(p.literal(m.arg_value_suffix)) + ); + + if (is_required) { + arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg)); + } else { + arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); + } + } + + // Build arg sequence with space() between consecutive args + common_peg_parser args_seq = p.eps(); + for (size_t i = 0; i < arg_parsers.size(); i++) { + if (i > 0) { + args_seq = args_seq + p.space(); + } + args_seq = args_seq + arg_parsers[i]; + } + + // Build call_id parser based on position (if supported) + common_peg_parser call_id_section = p.eps(); + if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS && + !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) { + call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix; + } + + auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) + + call_id_section + + p.space() + args_seq; + + if (!m.func_close.empty()) { + func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close)); + } else if (!m.per_call_end.empty()) { + // When there's no func_close but there is a per_call_end marker, use peek() to ensure + // we only emit tool_close when we can actually see the closing marker. This prevents + // premature closing during partial parsing when we've seen e.g. "" (end) or "" prefix that failed to match. + func_parser = func_parser + p.tool_close(p.peek(p.literal(m.per_call_end))); + } else { + func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper + } + + tool_choice |= p.rule("tool-" + name, func_parser); + }); + + auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + + common_peg_parser tool_calls = p.eps(); + + if (!m.per_call_start.empty()) { + auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end; + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call)); + } else { + tool_calls = p.trigger_rule("tool-call", wrapped_call); + } + if (!m.tool_section_start.empty()) { + tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() + + tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end))); + } + } else { + std::string separator = m.call_separator; + if (separator.empty()) { + separator = ", "; // Default + } + + if (inputs.parallel_tool_calls) { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end); + } else { + tool_calls = p.trigger_rule("tool-call", + m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end); + } + } + + if (!require_tools) { + tool_calls = p.optional(tool_calls); + } + + std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start; + auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); + return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end(); } diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h index c6587667d1..40f1fbe1bb 100644 --- a/common/chat-auto-parser.h +++ b/common/chat-auto-parser.h @@ -51,4 +51,20 @@ class universal_peg_generator { const diff_analysis_result & analysis, const templates_params & inputs, const common_peg_parser & reasoning); + + // Per-format tool parser builders + static common_peg_parser build_tool_parser_json_native(common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning); + + static common_peg_parser build_tool_parser_tag_json(common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning); + + static common_peg_parser build_tool_parser_tag_tagged(common_chat_peg_unified_builder & p, + const diff_analysis_result & analysis, + const templates_params & inputs, + const common_peg_parser & reasoning); }; diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h index 7933de5ce3..ce729df0e6 100644 --- a/common/chat-diff-analyzer.h +++ b/common/chat-diff-analyzer.h @@ -169,11 +169,7 @@ enum class tool_format { NONE, // No tool support detected JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}} TAG_WITH_JSON, // Tag-based with JSON args: {...} - BRACKET_TAG, // Bracket-tag: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} - PREFIXED_INDEXED, // Prefixed-indexed: functions.X:0{...} - RECIPIENT_BASED, // Recipient routing: >>>func_name\n{...} TAG_WITH_TAGGED, // Tag-based with tagged args: value - MARKDOWN_BLOCK, // Markdown code block: Action:\n```json\n[...]\n``` }; inline std::ostream & operator<<(std::ostream & os, const tool_format & format) { @@ -184,16 +180,8 @@ inline std::ostream & operator<<(std::ostream & os, const tool_format & format) return os << "JSON_NATIVE"; case tool_format::TAG_WITH_JSON: return os << "TAG_WITH_JSON"; - case tool_format::BRACKET_TAG: - return os << "BRACKET_TAG"; - case tool_format::PREFIXED_INDEXED: - return os << "PREFIXED_INDEXED"; - case tool_format::RECIPIENT_BASED: - return os << "RECIPIENT_BASED"; case tool_format::TAG_WITH_TAGGED: return os << "TAG_WITH_TAGGED"; - case tool_format::MARKDOWN_BLOCK: - return os << "MARKDOWN_BLOCK"; default: return os << "UNKNOWN"; } diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index 2922c8d582..cb38fb160f 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -35,6 +35,45 @@ static std::string_view trim(std::string_view sv) { return trim_trailing_space(trim_leading_space(sv, 1)); } +// Count the number of unclosed '{' braces in a JSON-like string, +// properly skipping braces inside quoted strings. +static int json_brace_depth(const std::string & s) { + int depth = 0; + bool in_string = false; + bool escaped = false; + for (char c : s) { + if (escaped) { + escaped = false; + continue; + } + if (c == '\\' && in_string) { + escaped = true; + continue; + } + if (c == '"') { + in_string = !in_string; + continue; + } + if (!in_string) { + if (c == '{') { + depth++; + } else if (c == '}') { + depth--; + } + } + } + return depth; +} + +// JSON-escape a string and return the inner content (without surrounding quotes). +static std::string escape_json_string_inner(const std::string & s) { + std::string escaped = json(s).dump(); + if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { + return escaped.substr(1, escaped.size() - 2); + } + return escaped; +} + // Convert Python-style single-quoted strings to JSON double-quoted strings // Only converts outer string delimiters, properly handling escape sequences: // - {'key': 'value'} -> {"key": "value"} @@ -148,6 +187,10 @@ common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::stri return zero_or_more(choice({ p, content_chunk })); } +std::string & common_chat_peg_unified_mapper::args_target() { + return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer; +} + void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & parse_result_arg) { // Call base class to visit all nodes @@ -156,15 +199,12 @@ void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena & ar // Flush any pending tool call that was started but never got a name // This happens during partial parsing when the tool call is incomplete if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) { - // Transfer any buffered arguments if (!args_buffer.empty()) { pending_tool_call->arguments = args_buffer; } - // Close any open quotes in buffered args - if (buffer_needs_closing_quote && !pending_tool_call->arguments.empty()) { + if (closing_quote_pending && !pending_tool_call->arguments.empty()) { pending_tool_call->arguments += "\""; } - // Add the incomplete tool call to results result.tool_calls.push_back(pending_tool_call.value()); pending_tool_call.reset(); } @@ -187,15 +227,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE; if (is_tool_open) { - // Don't create tool call yet - wait for name to be known - // This prevents sending incomplete tool calls in streaming mode - pending_tool_call = common_chat_tool_call(); - current_tool = &pending_tool_call.value(); - arg_count = 0; - // Clear the arguments buffer for the new tool + pending_tool_call = common_chat_tool_call(); + current_tool = &pending_tool_call.value(); + arg_count = 0; args_buffer.clear(); - needs_closing_quote = false; - buffer_needs_closing_quote = false; + closing_quote_pending = false; } if (is_tool_id && current_tool) { @@ -208,15 +244,14 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { if (is_tool_name && current_tool) { current_tool->name = std::string(trim_trailing_space(node.text)); - // Now that we have the name, we can populate the arguments from the buffer + // Now that we have the name, populate the arguments from the buffer if (!args_buffer.empty()) { current_tool->arguments = args_buffer; args_buffer.clear(); } else if (current_tool->arguments.empty()) { - // Initialize arguments if we're using tagged format and no buffered args current_tool->arguments = "{"; } - // Now that we have the name, add the tool call to the result + // Add the tool call to results so streaming can see it if (pending_tool_call.has_value()) { result.tool_calls.push_back(pending_tool_call.value()); pending_tool_call.reset(); @@ -225,28 +260,16 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { } if (is_tool_args && current_tool) { - // For JSON format, the arguments come as a complete JSON object - // For tagged format, we build up arguments from individual arg_name/arg_value nodes - // Check if this looks like JSON (starts with {) vs tagged format (starts with <) + // For JSON format: arguments come as a complete JSON object + // For tagged format: built up from individual arg_name/arg_value nodes auto text = trim_trailing_space(node.text); if (!text.empty() && text.front() == '{') { - // If we have the tool name, populate directly; otherwise buffer - if (!current_tool->name.empty()) { - current_tool->arguments = std::string(text); - } else { - args_buffer = std::string(text); - } + args_target() = std::string(text); } - // If it's tagged format, we ignore this and let arg_name/arg_value build up the JSON } if (is_arg_open) { - // Reset for new argument - if (!current_tool->name.empty()) { - needs_closing_quote = false; - } else { - buffer_needs_closing_quote = false; - } + closing_quote_pending = false; } if (is_arg_name && current_tool) { @@ -257,15 +280,11 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { arg_entry += json(trim(node.text)).dump() + ":"; ++arg_count; - // If we have the tool name, add directly; otherwise buffer - if (!current_tool->name.empty()) { - current_tool->arguments += arg_entry; - } else { - if (args_buffer.empty()) { - args_buffer = "{"; - } - args_buffer += arg_entry; + auto & target = args_target(); + if (target.empty()) { + target = "{"; } + target += arg_entry; } if ((is_arg_value || is_arg_string_value) && current_tool) { @@ -273,160 +292,83 @@ void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) { std::string value_to_add; if (value_content.empty() && is_arg_string_value) { - // Empty string value - start with opening quote - // arg_close will add the closing quote - if (!current_tool->name.empty()) { - value_to_add = "\""; - needs_closing_quote = true; - } else { - value_to_add = "\""; - buffer_needs_closing_quote = true; - } + // Empty string value - arg_close will add the closing quote + value_to_add = "\""; + closing_quote_pending = true; } else if (!value_content.empty() && is_arg_string_value) { // Schema declares this as string type - always treat as literal string value - // Never try to parse as JSON (this ensures consistent handling of quoted strings - // like "foo" which would otherwise be parsed as JSON string 'foo') - if (!current_tool->name.empty()) { - if (!needs_closing_quote) { - value_to_add = "\""; - needs_closing_quote = true; - } - } else { - if (!buffer_needs_closing_quote) { - value_to_add = "\""; - buffer_needs_closing_quote = true; - } + if (!closing_quote_pending) { + value_to_add = "\""; + closing_quote_pending = true; } - // Escape special characters in the string content - std::string escaped = json(value_content).dump(); - // Remove the surrounding quotes from the escaped string - if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { - escaped = escaped.substr(1, escaped.size() - 2); - } - value_to_add += escaped; + value_to_add += escape_json_string_inner(value_content); } else if (!value_content.empty()) { - // For potential containers, normalize Python-style single quotes to JSON double quotes first - // This ensures consistent output during both partial and final parsing + // For potential containers, normalize Python-style single quotes to JSON double quotes bool is_potential_container = value_content[0] == '[' || value_content[0] == '{'; if (is_potential_container) { value_content = normalize_quotes_to_json(value_content); } // Try to parse as JSON value (number, bool, null, object, array) - // For strings, we need special handling to support incremental parsing try { json parsed = json::parse(value_content); if (parsed.is_string()) { - // For string values, don't add closing quote yet (added by arg_close) - // This ensures incremental parsing produces monotonic arguments + // Don't add closing quote yet (added by arg_close) for monotonic streaming std::string escaped = parsed.dump(); - // Remove the trailing quote if (!escaped.empty() && escaped.back() == '"') { escaped.pop_back(); } - value_to_add = escaped; - if (!current_tool->name.empty()) { - needs_closing_quote = true; - } else { - buffer_needs_closing_quote = true; - } + value_to_add = escaped; + closing_quote_pending = true; } else { - // For non-string values (number, bool, null, object, array), add raw value content - // Using raw content instead of dump() ensures monotonicity for streaming - // (prevents issues with spaces being removed by dump()) + // Non-string values: use raw content to preserve whitespace for monotonicity value_to_add = value_content; } } catch (...) { - // JSON parsing failed - content is either incomplete (partial) or not valid JSON - // Note: potential containers were already normalized above, so value_content - // already has double quotes if it started with [ or { - if (node.is_partial && is_potential_container) { - // During incremental parsing, if it looks like a JSON container, don't wrap in quotes yet - // and don't escape. Just pass through the (already normalized) content. + // Partial container: pass through the already-normalized content value_to_add = value_content; } else { - // Not valid JSON and NOT a potential partial container - treat as string value - // Add opening quote if not already in a string - if (!current_tool->name.empty()) { - if (!needs_closing_quote) { - value_to_add = "\""; - needs_closing_quote = true; - } - } else { - if (!buffer_needs_closing_quote) { - value_to_add = "\""; - buffer_needs_closing_quote = true; - } + // Not valid JSON - treat as string value + if (!closing_quote_pending) { + value_to_add = "\""; + closing_quote_pending = true; } - // Escape special characters in the string content - std::string escaped = json(value_content).dump(); - // Remove the surrounding quotes from the escaped string - if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') { - escaped = escaped.substr(1, escaped.size() - 2); - } - value_to_add += escaped; + value_to_add += escape_json_string_inner(value_content); } } } - // If we have the tool name, add directly; otherwise buffer - if (!current_tool->name.empty()) { - current_tool->arguments += value_to_add; - } else { - if (args_buffer.empty()) { - args_buffer = "{"; - } - args_buffer += value_to_add; - } + args_target() += value_to_add; } if (is_arg_close && current_tool) { - if (!current_tool->name.empty()) { - if (needs_closing_quote) { - current_tool->arguments += "\""; - needs_closing_quote = false; - } - } else { - if (buffer_needs_closing_quote) { - if (args_buffer.empty()) { - args_buffer = "{"; - } - args_buffer += "\""; - buffer_needs_closing_quote = false; - } + if (closing_quote_pending) { + args_target() += "\""; + closing_quote_pending = false; } } if (is_tool_close && current_tool) { - if (!current_tool->name.empty()) { - if (needs_closing_quote) { - current_tool->arguments += "\""; - needs_closing_quote = false; - } - if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') { - current_tool->arguments += "}"; - } - // If we have a pending tool call that wasn't added yet, add it now - if (pending_tool_call.has_value()) { + // Flush buffer to arguments if tool name was never seen + if (current_tool->name.empty() && !args_buffer.empty()) { + current_tool->arguments = args_buffer; + args_buffer.clear(); + } + // Close any pending string quote + if (closing_quote_pending) { + current_tool->arguments += "\""; + closing_quote_pending = false; + } + // Close any unclosed braces (accounts for nested objects) + for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) { + current_tool->arguments += "}"; + } + // Add tool call to results if named; otherwise discard + if (pending_tool_call.has_value()) { + if (!current_tool->name.empty()) { result.tool_calls.push_back(pending_tool_call.value()); - pending_tool_call.reset(); } - } else { - // We're closing a tool without a name - flush the buffer - if (!args_buffer.empty()) { - current_tool->arguments = args_buffer; - args_buffer.clear(); - } - if (buffer_needs_closing_quote) { - current_tool->arguments += "\""; - buffer_needs_closing_quote = false; - } - // Close the arguments object if using tagged format - if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') { - current_tool->arguments += "}"; - } - // Don't add to result if no name - this prevents incomplete tool calls pending_tool_call.reset(); } } @@ -511,6 +453,241 @@ static std::pair parse_key_spec(const std::string & ke return {key.substr(0, dot_pos), key.substr(dot_pos + 1)}; } +// Mode 1: function_is_key — parse {"function_name": {...}} +common_peg_parser common_chat_peg_unified_builder::build_json_tools_function_is_key( + const nlohmann::json & tools, + const std::string & args_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key) { + + auto tool_choices = choice(); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + // Build inner object fields + std::vector inner_fields; + + if (!call_id_key.empty()) { + auto id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + literal("\"") + tool_id(json_string_content()) + literal("\"") + ); + inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space()))); + } + + if (!gen_call_id_key.empty()) { + auto gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space()))); + } + + // Arguments — either wrapped in args_key or parsed directly + common_peg_parser args_parser = eps(); + if (args_key.empty()) { + args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params)); + } else { + args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + } + inner_fields.push_back(args_parser); + + // Build inner object parser + common_peg_parser inner_object = eps(); + if (args_key.empty() && inner_fields.size() == 1) { + inner_object = inner_fields[0]; + } else { + inner_object = literal("{") + space(); + for (size_t i = 0; i < inner_fields.size(); i++) { + inner_object = inner_object + inner_fields[i]; + if (i < inner_fields.size() - 1) { + inner_object = inner_object + space(); + } + } + inner_object = inner_object + space() + literal("}"); + } + + auto tool_parser = tool( + tool_open(literal("{")) + space() + + literal("\"") + tool_name(literal(name)) + literal("\"") + + space() + literal(":") + space() + + inner_object + + space() + tool_close(literal("}")) + ); + + tool_choices |= rule("tool-" + name, tool_parser); + } + + return tool_choices; +} + +// Mode 2: Nested keys (dot notation like "function.name") +common_peg_parser common_chat_peg_unified_builder::build_json_tools_nested_keys( + const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key) { + + auto tool_choices = choice(); + + auto name_spec = parse_key_spec(effective_name_key); + auto args_spec = parse_key_spec(effective_args_key); + + std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first; + std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key; + std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key; + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() + + literal("\"") + tool_name(literal(name)) + literal("\""); + auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + + auto nested_object = literal("{") + space() + + nested_name + space() + literal(",") + space() + + nested_args + + space() + literal("}"); + + // Format: { id?, "function": {...} } + auto tool_parser_body = tool_open(literal("{")) + space(); + + if (!call_id_key.empty()) { + auto id_spec = parse_key_spec(call_id_key); + if (id_spec.first.empty()) { + auto id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + literal("\"") + tool_id(json_string_content()) + literal("\"") + ); + tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space()); + } + } + + if (!gen_call_id_key.empty()) { + auto gen_id_spec = parse_key_spec(gen_call_id_key); + if (gen_id_spec.first.empty()) { + auto gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space()); + } + } + + auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object; + tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}")); + + tool_choices |= rule("tool-" + name, tool(tool_parser_body)); + } + + return tool_choices; +} + +// Mode 3: Flat keys with optional ID fields and parameter ordering +common_peg_parser common_chat_peg_unified_builder::build_json_tools_flat_keys( + const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key, + const std::vector & parameters_order) { + + auto tool_choices = choice(); + auto name_key_parser = literal("\"" + effective_name_key + "\""); + auto args_key_parser = literal("\"" + effective_args_key + "\""); + + for (const auto & tool_def : tools) { + if (!tool_def.contains("function")) { + continue; + } + const auto & function = tool_def.at("function"); + std::string name = function.at("name"); + nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); + + auto tool_name_ = name_key_parser + space() + literal(":") + space() + + literal("\"") + tool_name(literal(name)) + literal("\""); + auto tool_args_ = args_key_parser + space() + literal(":") + space() + + tool_args(schema(json(), "tool-" + name + "-schema", params)); + + // Build ID parsers if keys are provided + common_peg_parser id_parser = eps(); + if (!call_id_key.empty()) { + id_parser = atomic( + literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + } + + common_peg_parser gen_id_parser = eps(); + if (!gen_call_id_key.empty()) { + gen_id_parser = atomic( + literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + + choice({ + literal("\"") + tool_id(json_string_content()) + literal("\""), + tool_id(json_number()) + }) + ); + } + + // Create (parser, key) pairs for all fields, then sort by parameters_order + std::vector> parser_pairs; + parser_pairs.emplace_back(tool_name_, effective_name_key); + parser_pairs.emplace_back(tool_args_, effective_args_key); + if (!call_id_key.empty()) { + parser_pairs.emplace_back(optional(id_parser), call_id_key); + } + if (!gen_call_id_key.empty()) { + parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key); + } + + std::sort(parser_pairs.begin(), parser_pairs.end(), + [¶meters_order](const auto & a, const auto & b) { + auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second); + auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second); + size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a); + size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b); + return idx_a < idx_b; + }); + + auto ordered_body = tool_open(literal("{")) + space(); + for (size_t i = 0; i < parser_pairs.size(); i++) { + ordered_body = ordered_body + parser_pairs[i].first; + if (i < parser_pairs.size() - 1) { + ordered_body = ordered_body + space() + literal(",") + space(); + } + } + ordered_body = ordered_body + space() + tool_close(literal("}")); + + tool_choices |= rule("tool-" + name, tool(ordered_body)); + } + + return tool_choices; +} + common_peg_parser common_chat_peg_unified_builder::standard_json_tools( const std::string & section_start, const std::string & section_end, @@ -528,239 +705,20 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools( return eps(); } - // Build tool choices for JSON format - auto tool_choices = choice(); - // auto other_member = json_string() + space() + literal(":") + space() + json(); - - // Determine effective field names std::string effective_name_key = name_key.empty() ? "name" : name_key; std::string effective_args_key = args_key.empty() ? "arguments" : args_key; - // Check if we have nested keys (dot notation) - auto name_spec = parse_key_spec(effective_name_key); - auto args_spec = parse_key_spec(effective_args_key); - bool has_nested_keys = !name_spec.first.empty() || !args_spec.first.empty(); - - // Mode 1: function_is_key - parse {"function_name": {...}} + // Dispatch to the appropriate builder based on the JSON layout mode + common_peg_parser tool_choices = eps(); if (function_is_key) { - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - // Build inner object fields - std::vector inner_fields; - - // Add optional string ID field - if (!call_id_key.empty()) { - auto id_parser = atomic( - literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + - literal("\"") + tool_id(json_string_content()) + literal("\"") - ); - inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space()))); - } - - // Add optional generated integer ID field - if (!gen_call_id_key.empty()) { - auto gen_id_parser = atomic( - literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + - choice({ - literal("\"") + tool_id(json_string_content()) + literal("\""), - tool_id(json_number()) - }) - ); - inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space()))); - } - - // Add arguments - either wrapped in args_key or parsed directly - common_peg_parser args_parser = eps(); - if (args_key.empty()) { - // Arguments are directly the inner object value: {"func_name": {"arg1": "val"}} - args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params)); - } else { - // Arguments are wrapped in a key: {"func_name": {"arguments": {"arg1": "val"}}} - args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() + - tool_args(schema(json(), "tool-" + name + "-schema", params)); - } - inner_fields.push_back(args_parser); - - // Build inner object parser - no greedy other_member skipping to avoid consuming ID - common_peg_parser inner_object = eps(); - if (args_key.empty() && inner_fields.size() == 1) { - // Direct arguments: {"func_name": {"arg1": "val"}} - // The args_parser is already the full object schema - inner_object = inner_fields[0]; - } else { - // Wrapped arguments: {"func_name": {"arguments": {"arg1": "val"}}} - inner_object = literal("{") + space(); - for (size_t i = 0; i < inner_fields.size(); i++) { - inner_object = inner_object + inner_fields[i]; - if (i < inner_fields.size() - 1) { - inner_object = inner_object + space(); - } - } - inner_object = inner_object + space() + literal("}"); - } - - // Tool call format: { "function_name": { inner_object } } - auto tool_parser = tool( - tool_open(literal("{")) + space() + - literal("\"") + tool_name(literal(name)) + literal("\"") + - space() + literal(":") + space() + - inner_object + - space() + tool_close(literal("}")) - ); - - tool_choices |= rule("tool-" + name, tool_parser); - } - } - // Mode 2: Nested keys (dot notation like "function.name") - else if (has_nested_keys) { - // Group fields by prefix - std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first; - std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key; - std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key; - - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - // Build nested object with name and arguments - auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() + - literal("\"") + tool_name(literal(name)) + literal("\""); - auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() + - tool_args(schema(json(), "tool-" + name + "-schema", params)); - - auto nested_object = literal("{") + space() + - nested_name + space() + literal(",") + space() + - nested_args + - space() + literal("}"); - - // Build top-level parser - simpler structure without greedy other_member skipping - // Format: { id?, "function": {...} } - auto tool_parser_body = tool_open(literal("{")) + space(); - - // Add optional string ID field at top level - if (!call_id_key.empty()) { - auto id_spec = parse_key_spec(call_id_key); - if (id_spec.first.empty()) { // Top-level ID field - auto id_parser = atomic( - literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + - literal("\"") + tool_id(json_string_content()) + literal("\"") - ); - tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space()); - } - } - - // Add optional generated integer ID field at top level - if (!gen_call_id_key.empty()) { - auto gen_id_spec = parse_key_spec(gen_call_id_key); - if (gen_id_spec.first.empty()) { // Top-level gen ID field - auto gen_id_parser = atomic( - literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + - choice({ - literal("\"") + tool_id(json_string_content()) + literal("\""), - tool_id(json_number()) - }) - ); - tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space()); - } - } - - // Add the nested object field - auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object; - tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}")); - - tool_choices |= rule("tool-" + name, tool(tool_parser_body)); - } - } - // Mode 3: Flat keys (enhanced with ID fields and parameter ordering) - else { - auto name_key_parser = literal("\"" + effective_name_key + "\""); - auto args_key_parser = literal("\"" + effective_args_key + "\""); - - for (const auto & tool_def : tools) { - if (!tool_def.contains("function")) { - continue; - } - const auto & function = tool_def.at("function"); - std::string name = function.at("name"); - nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object(); - - auto tool_name_ = name_key_parser + space() + literal(":") + space() + - literal("\"") + tool_name(literal(name)) + literal("\""); - auto tool_args_ = args_key_parser + space() + literal(":") + space() + - tool_args(schema(json(), "tool-" + name + "-schema", params)); - - // Build ID parsers if keys are provided - common_peg_parser id_parser = eps(); - if (!call_id_key.empty()) { - id_parser = atomic( - literal("\"" + call_id_key + "\"") + space() + literal(":") + space() + - choice({ - literal("\"") + tool_id(json_string_content()) + literal("\""), - tool_id(json_number()) - }) - ); - } - - common_peg_parser gen_id_parser = eps(); - if (!gen_call_id_key.empty()) { - gen_id_parser = atomic( - literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() + - choice({ - literal("\"") + tool_id(json_string_content()) + literal("\""), - tool_id(json_number()) - }) - ); - } - - common_peg_parser tool_parser = eps(); - - // Use parameter ordering if provided - parse fields in specified order without greedy skipping - if (!parameters_order.empty()) { - } - // Build parser using parameter ordering (works with or without explicit parameters_order) - // Create list of (parser, key) pairs for all fields - std::vector> parser_pairs; - parser_pairs.emplace_back(tool_name_, effective_name_key); - parser_pairs.emplace_back(tool_args_, effective_args_key); - if (!call_id_key.empty()) { - parser_pairs.emplace_back(optional(id_parser), call_id_key); - } - if (!gen_call_id_key.empty()) { - parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key); - } - - // Sort by position in parameters_order (or at end if not present) - std::sort(parser_pairs.begin(), parser_pairs.end(), - [¶meters_order](const auto & a, const auto & b) { - auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second); - auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second); - size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a); - size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b); - return idx_a < idx_b; - }); - - // Build ordered parser - auto ordered_body = tool_open(literal("{")) + space(); - for (size_t i = 0; i < parser_pairs.size(); i++) { - ordered_body = ordered_body + parser_pairs[i].first; - if (i < parser_pairs.size() - 1) { - ordered_body = ordered_body + space() + literal(",") + space(); - } - } - ordered_body = ordered_body + space() + tool_close(literal("}")); - tool_parser = tool(ordered_body); - - tool_choices |= rule("tool-" + name, tool_parser); + tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key); + } else { + auto name_spec = parse_key_spec(effective_name_key); + auto args_spec = parse_key_spec(effective_args_key); + if (!name_spec.first.empty() || !args_spec.first.empty()) { + tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key); + } else { + tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order); } } @@ -770,7 +728,6 @@ common_peg_parser common_chat_peg_unified_builder::standard_json_tools( tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices); } - // Optionally wrap in array brackets if (array_wrapped) { tool_calls = literal("[") + space() + tool_calls + space() + literal("]"); } diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h index f5d49a403a..c0392f0c5d 100644 --- a/common/chat-peg-parser.h +++ b/common/chat-peg-parser.h @@ -108,6 +108,27 @@ class common_chat_peg_unified_builder : public common_chat_peg_builder { const nlohmann::json & tools, bool parallel_tool_calls, bool force_tool_calls); + + private: + // Implementation helpers for standard_json_tools — one per JSON tool call layout mode + common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools, + const std::string & args_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key); + + common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key); + + common_peg_parser build_json_tools_flat_keys(const nlohmann::json & tools, + const std::string & effective_name_key, + const std::string & effective_args_key, + const std::string & call_id_key, + const std::string & gen_call_id_key, + const std::vector & parameters_order); }; inline common_peg_arena build_chat_peg_unified_parser( @@ -119,11 +140,14 @@ inline common_peg_arena build_chat_peg_unified_parser( class common_chat_peg_unified_mapper : public common_chat_peg_mapper { std::optional pending_tool_call; // Tool call waiting for name - common_chat_tool_call * current_tool = nullptr; - int arg_count = 0; - bool needs_closing_quote = false; + common_chat_tool_call * current_tool = nullptr; + int arg_count = 0; + bool closing_quote_pending = false; std::string args_buffer; // Buffer to delay arguments until tool name is known - bool buffer_needs_closing_quote = false; // Track quote state for buffered args + + // Returns a reference to the active argument destination string. + // Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments. + std::string & args_target(); public: common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {} diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index e64e362129..d9f1eea2f2 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -369,6 +369,28 @@ static common_chat_tool amount_tool{ })", }; +static common_chat_tool imaginary_number_tool{ + /* .name = */ "imaginary_number", + /* .description = */ "Imaginary number converter", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "number": { + "type": "object", + "properties": { + "real": { + "type": "number" + }, + "imaginary": { + "type": "number" + } + }, + "required": ["real", "imaginary"] + } + }, + "required": ["number"] + })", +}; static common_chat_tool string_param_tool{ /* .name = */ "string_param", @@ -394,7 +416,7 @@ static common_chat_tool quoted_unquoted_tool{ "quoted": { "type": "string", "description": "Quoted value" - }, + }, "unquoted": { "type": "string", "description": "Unquoted value" @@ -2323,6 +2345,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); + tst.test( + "Test imaginary number\n" + "\n" + "\n" + "\n" + "\n" + "{ \"real\": 3.14, \"imaginary\": 2.71 }\n" + "\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ imaginary_number_tool }) + .expect_reasoning("Test imaginary number") + .expect_tool_calls({ + { "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} } + }) + .run(); + } }