From 870e5d526b29acc54d2ebc78946a3cfb8be06469 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Tue, 31 Mar 2026 03:03:15 -0500 Subject: [PATCH 1/6] common : simplify autoparser tagged parser rules --- common/chat-auto-parser-generator.cpp | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 3f036bb5b2..3a0d7bf6d8 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -279,36 +279,32 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte const auto & inputs = ctx.inputs; bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED; + auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix)); + common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { const auto & func = tool.at("function"); std::string name = func.at("name"); - const auto & params = func.contains("parameters") ? func.at("parameters") : json::object(); + auto params = func.contains("parameters") ? func.at("parameters") : json::object(); const auto & properties = params.contains("properties") ? params.at("properties") : json::object(); std::set required; + auto schema_info = common_schema_info(); + schema_info.resolve_refs(params); + // Build parser for each argument, separating required and optional std::vector required_parsers; std::vector optional_parsers; for (const auto & [param_name, param_schema] : properties.items()) { - bool is_required = required.find(param_name) != required.end(); - std::string type = "object"; - auto type_obj = param_schema.contains("type") ? param_schema.at("type") : json::object(); - if (type_obj.is_string()) { - type_obj.get_to(type); - } else if (type_obj.is_object()) { - if (type_obj.contains("type") && type_obj.at("type").is_string()) { - type_obj.at("type").get_to(type); - } - } + bool is_required = required.find(param_name) != required.end(); auto arg = p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + arguments.name_suffix) + arguments.value_prefix + - (type == "string" ? - p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), + (schema_info.resolves_to_string(param_schema) ? + p.tool_arg_string_value(p.schema(until_suffix, "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) : p.tool_arg_json_value(p.schema( @@ -405,7 +401,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte if (!format.section_start.empty()) { tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() + tool_calls + p.space() + - (format.section_end.empty() ? p.end() : p.literal(format.section_end))); + (format.section_end.empty() ? p.eps() : p.literal(format.section_end))); } } else { std::string separator = ", "; // Default @@ -426,8 +422,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls + - p.end(); + return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls; } } // namespace autoparser From d4e7f58f79cebb9cf7738cdab367efde425d3fa6 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Tue, 31 Mar 2026 22:11:47 -0500 Subject: [PATCH 2/6] common/peg-parser : fix parenthesization of wrapped parsers --- common/peg-parser.cpp | 47 +++++++++++++----- tests/peg-parser/test-gbnf-generation.cpp | 60 +++++++++++++++++++++++ 2 files changed, 96 insertions(+), 11 deletions(-) diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp index a6d9a4c27c..694f9b850a 100644 --- a/common/peg-parser.cpp +++ b/common/peg-parser.cpp @@ -1557,6 +1557,36 @@ static std::unordered_set collect_reachable_rules( // GBNF generation implementation void common_peg_arena::build_grammar(const common_grammar_builder & builder, bool lazy) const { + auto schema_delegates = [](const common_peg_schema_parser & s) -> bool { + if (!s.schema) { + return true; + } + if (s.raw && s.schema->contains("type") && s.schema->at("type").is_string() && s.schema->at("type") == "string") { + return true; + } + return false; + }; + + // Unwrap the parser so we can properly check if it's a sequence or choice + auto effective_parser = [&](common_peg_parser_id id) -> const common_peg_parser_variant & { + while (true) { + const auto & p = parsers_.at(id); + if (const auto * tag = std::get_if(&p)) { + id = tag->child; + } else if (const auto * atomic = std::get_if(&p)) { + id = atomic->child; + } else if (const auto * schema = std::get_if(&p)) { + if (schema_delegates(*schema)) { + id = schema->child; + } else { + return p; + } + } else { + return p; + } + } + }; + // Generate GBNF for a parser std::function to_gbnf = [&](common_peg_parser_id id) -> std::string { const auto & parser = parsers_.at(id); @@ -1577,7 +1607,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo s += " "; } auto child_gbnf = to_gbnf(child); - const auto & child_parser = parsers_.at(child); + const auto & child_parser = effective_parser(child); if (std::holds_alternative(child_parser) || std::holds_alternative(child_parser)) { s += "(" + child_gbnf + ")"; @@ -1593,7 +1623,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo s += " | "; } auto child_gbnf = to_gbnf(child); - const auto & child_parser = parsers_.at(child); + const auto & child_parser = effective_parser(child); if (std::holds_alternative(child_parser)) { s += "(" + child_gbnf + ")"; } else { @@ -1603,7 +1633,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo return s; } else if constexpr (std::is_same_v) { auto child_gbnf = to_gbnf(p.child); - const auto & child_parser = parsers_.at(p.child); + const auto & child_parser = effective_parser(p.child); if (std::holds_alternative(child_parser) || std::holds_alternative(child_parser)) { child_gbnf = "(" + child_gbnf + ")"; @@ -1663,15 +1693,10 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo } return gbnf_excluding_pattern(p.delimiters); } else if constexpr (std::is_same_v) { - if (p.schema) { - if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && p.schema->at("type") == "string") { - // TODO: Implement more comprehensive grammar generation for raw strings. - // For now, use the grammar emitted from the underlying parser. - return to_gbnf(p.child); - } - return builder.add_schema(p.name, *p.schema); + if (schema_delegates(p)) { + return to_gbnf(p.child); } - return to_gbnf(p.child); + return builder.add_schema(p.name, *p.schema); } else if constexpr (std::is_same_v) { return p.name; } else if constexpr (std::is_same_v) { diff --git a/tests/peg-parser/test-gbnf-generation.cpp b/tests/peg-parser/test-gbnf-generation.cpp index 68857a5e88..1ab9a7ede3 100644 --- a/tests/peg-parser/test-gbnf-generation.cpp +++ b/tests/peg-parser/test-gbnf-generation.cpp @@ -213,6 +213,66 @@ void test_gbnf_generation(testing &t) { )""", gbnf); }); + t.test("tagged choice inside sequence gets parenthesized", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + return p.literal("a") + p.tag("t", p.literal("b") | p.literal("c")); + }); + + auto gbnf = build_grammar([&](const common_grammar_builder & builder) { + parser.build_grammar(builder); + }); + + assert_gbnf_equal(t, R"""( + root ::= "a" ("b" | "c") + space ::= | " " | "\n"{1,2} [ \t]{0,20} + )""", gbnf); + }); + + t.test("tagged sequence inside choice gets parenthesized", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + return p.tag("t", p.literal("a") + p.literal("b")) | p.literal("c"); + }); + + auto gbnf = build_grammar([&](const common_grammar_builder & builder) { + parser.build_grammar(builder); + }); + + assert_gbnf_equal(t, R"""( + root ::= "a" "b" | "c" + space ::= | " " | "\n"{1,2} [ \t]{0,20} + )""", gbnf); + }); + + t.test("atomic choice inside repetition gets parenthesized", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + return p.one_or_more(p.atomic(p.literal("a") | p.literal("b"))); + }); + + auto gbnf = build_grammar([&](const common_grammar_builder & builder) { + parser.build_grammar(builder); + }); + + assert_gbnf_equal(t, R"""( + root ::= ("a" | "b")+ + space ::= | " " | "\n"{1,2} [ \t]{0,20} + )""", gbnf); + }); + + t.test("nested transparent wrappers get parenthesized", [](testing &t) { + auto parser = build_peg_parser([](common_peg_parser_builder & p) { + return p.literal("x") + p.tag("outer", p.atomic(p.literal("a") | p.literal("b"))); + }); + + auto gbnf = build_grammar([&](const common_grammar_builder & builder) { + parser.build_grammar(builder); + }); + + assert_gbnf_equal(t, R"""( + root ::= "x" ("a" | "b") + space ::= | " " | "\n"{1,2} [ \t]{0,20} + )""", gbnf); + }); + t.test("emit only trigger rules (and references)", [](testing &t) { auto parser = build_peg_parser([](common_peg_parser_builder & p) { auto rule1 = p.rule("rule-1", p.literal("a") + p.ref("rule-2")); From aff47d5a3bb17b14d16a2219cdeb6a0ee8f6238b Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Tue, 31 Mar 2026 22:27:26 -0500 Subject: [PATCH 3/6] cont : remove upper limit on optional args --- common/chat-auto-parser-generator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 3a0d7bf6d8..6266861a10 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -335,7 +335,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte for (const auto & opt : optional_parsers) { any_opt |= opt; } - args_seq = args_seq + p.repeat(p.space() + any_opt, 0, (int) optional_parsers.size()); + args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1); } // Build call_id parser based on position (if supported) From 731a3f9c6b7f9cbe806dff71fd21701a9f66cef4 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Tue, 31 Mar 2026 22:31:05 -0500 Subject: [PATCH 4/6] cont : revert changes to parsing at the end --- common/chat-auto-parser-generator.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 6266861a10..3461566677 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -401,7 +401,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte if (!format.section_start.empty()) { tool_calls = p.trigger_rule("tool-calls", p.literal(format.section_start) + p.space() + tool_calls + p.space() + - (format.section_end.empty() ? p.eps() : p.literal(format.section_end))); + (format.section_end.empty() ? p.end() : p.literal(format.section_end))); } } else { std::string separator = ", "; // Default @@ -422,7 +422,8 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start; auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker); - return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls; + return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls + + p.end(); } } // namespace autoparser From d0000c1150d7e39f72ac4a9858f672b9118877ee Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 1 Apr 2026 00:04:57 -0500 Subject: [PATCH 5/6] cont : undo arbitrary ordering of optional args --- common/chat-auto-parser-generator.cpp | 34 +++++---------------- tests/test-chat.cpp | 44 ++------------------------- 2 files changed, 11 insertions(+), 67 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 3461566677..4eed6f27b6 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -293,9 +293,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte auto schema_info = common_schema_info(); schema_info.resolve_refs(params); - // Build parser for each argument, separating required and optional - std::vector required_parsers; - std::vector optional_parsers; + std::vector args; for (const auto & [param_name, param_schema] : properties.items()) { bool is_required = required.find(param_name) != required.end(); @@ -312,31 +310,15 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte p.space()) + p.tool_arg_close(p.literal(arguments.value_suffix))); - auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg); - if (is_required) { - required_parsers.push_back(named_arg); - } else { - optional_parsers.push_back(named_arg); + auto named_arg = p.repeat(p.rule("tool-" + name + "-arg-" + param_name, arg), is_required ? 1 : 0, 1); + + if (!args.empty()) { + args.push_back(p.space()); } + args.push_back(named_arg); } - // Build required arg sequence in definition order - common_peg_parser args_seq = p.eps(); - for (size_t i = 0; i < required_parsers.size(); i++) { - if (i > 0) { - args_seq = args_seq + p.space(); - } - args_seq = args_seq + required_parsers[i]; - } - - // Build optional args with flexible ordering - if (!optional_parsers.empty()) { - common_peg_parser any_opt = p.choice(); - for (const auto & opt : optional_parsers) { - any_opt |= opt; - } - args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1); - } + common_peg_parser args_seq = p.sequence(args); // Build call_id parser based on position (if supported) common_peg_parser call_id_section = p.eps(); @@ -357,7 +339,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + call_id_section) + p.space() + args_seq; matched_atomic = true; - } else if (!arguments.name_prefix.empty() && !required_parsers.empty()) { + } else if (!arguments.name_prefix.empty() && !required.empty()) { // Only peek for an arg tag when there are required args that must follow. // When all args are optional, the model may emit no arg tags at all (#20650). func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) + diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 1c4da68195..563a4a6605 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -2144,57 +2144,19 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .expect_reconstruction() .run(); - // Test flexible optional argument ordering (2 required + 4 optional, reversed optional order) + // Test optional argument ordering (2 required + 4 optional) tst.test( "\n" "\n" "\nhello\n\n" "\n42\n\n" - "\n100\n\n" "\n200\n\n" + "\n100\n\n" "\n" "") .tools({ tool_2req_4opt }) .expect_tool_calls({ - { "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt4": 100, "opt2": 200})", {} }, - }) - .expect_reconstruction() - .run(); - - // Test flexible optional argument ordering (2 required + 5 optional, reversed optional order) - tst.test( - "\n" - "\n" - "\nworld\n\n" - "\n7\n\n" - "\nlast\n\n" - "\nmiddle\n\n" - "\nfirst\n\n" - "\n" - "") - .tools({ tool_2req_5opt }) - .expect_tool_calls({ - { "tool_2req_5opt", R"({"req1": "world", "req2": 7, "opt5": "last", "opt3": "middle", "opt1": "first"})", {} }, - }) - .expect_reconstruction() - .run(); - - // Test flexible optional argument ordering (2 required + 5 optional, all 5 in shuffled order) - tst.test( - "\n" - "\n" - "\ntest\n\n" - "\n99\n\n" - "\nc\n\n" - "\na\n\n" - "\ne\n\n" - "\n4\n\n" - "\n2\n\n" - "\n" - "") - .tools({ tool_2req_5opt }) - .expect_tool_calls({ - { "tool_2req_5opt", R"({"req1": "test", "req2": 99, "opt3": "c", "opt1": "a", "opt5": "e", "opt4": 4, "opt2": 2})", {} }, + { "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt2": 200, "opt4": 100})", {} }, }) .expect_reconstruction() .run(); From c3430c4e341b1015d9ae1be6b6162395b8c62e5e Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Wed, 1 Apr 2026 00:42:59 -0500 Subject: [PATCH 6/6] cont : fix uninitialized required parameters --- common/chat-auto-parser-generator.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 4eed6f27b6..d83079bba0 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -288,7 +288,11 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte std::string name = func.at("name"); auto params = func.contains("parameters") ? func.at("parameters") : json::object(); const auto & properties = params.contains("properties") ? params.at("properties") : json::object(); + std::set required; + if (params.contains("required")) { + params.at("required").get_to(required); + } auto schema_info = common_schema_info(); schema_info.resolve_refs(params);