Merge c3430c4e34 into 4951250235
This commit is contained in:
commit
a3adb63ef7
|
|
@ -279,36 +279,34 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
|
|||
const auto & inputs = ctx.inputs;
|
||||
bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix));
|
||||
|
||||
common_peg_parser tool_choice = p.choice();
|
||||
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & func = tool.at("function");
|
||||
std::string name = func.at("name");
|
||||
const auto & params = func.contains("parameters") ? func.at("parameters") : json::object();
|
||||
auto params = func.contains("parameters") ? func.at("parameters") : json::object();
|
||||
const auto & properties = params.contains("properties") ? params.at("properties") : json::object();
|
||||
std::set<std::string> required;
|
||||
|
||||
// Build parser for each argument, separating required and optional
|
||||
std::vector<common_peg_parser> required_parsers;
|
||||
std::vector<common_peg_parser> optional_parsers;
|
||||
std::set<std::string> required;
|
||||
if (params.contains("required")) {
|
||||
params.at("required").get_to(required);
|
||||
}
|
||||
|
||||
auto schema_info = common_schema_info();
|
||||
schema_info.resolve_refs(params);
|
||||
|
||||
std::vector<common_peg_parser> args;
|
||||
for (const auto & [param_name, param_schema] : properties.items()) {
|
||||
bool is_required = required.find(param_name) != required.end();
|
||||
std::string type = "object";
|
||||
auto type_obj = param_schema.contains("type") ? param_schema.at("type") : json::object();
|
||||
if (type_obj.is_string()) {
|
||||
type_obj.get_to(type);
|
||||
} else if (type_obj.is_object()) {
|
||||
if (type_obj.contains("type") && type_obj.at("type").is_string()) {
|
||||
type_obj.at("type").get_to(type);
|
||||
}
|
||||
}
|
||||
bool is_required = required.find(param_name) != required.end();
|
||||
|
||||
auto arg =
|
||||
p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) +
|
||||
arguments.name_suffix) +
|
||||
arguments.value_prefix +
|
||||
(type == "string" ?
|
||||
p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
|
||||
(schema_info.resolves_to_string(param_schema) ?
|
||||
p.tool_arg_string_value(p.schema(until_suffix,
|
||||
"tool-" + name + "-arg-" + param_name + "-schema",
|
||||
param_schema, true)) :
|
||||
p.tool_arg_json_value(p.schema(
|
||||
|
|
@ -316,31 +314,15 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
|
|||
p.space()) +
|
||||
p.tool_arg_close(p.literal(arguments.value_suffix)));
|
||||
|
||||
auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
|
||||
if (is_required) {
|
||||
required_parsers.push_back(named_arg);
|
||||
} else {
|
||||
optional_parsers.push_back(named_arg);
|
||||
auto named_arg = p.repeat(p.rule("tool-" + name + "-arg-" + param_name, arg), is_required ? 1 : 0, 1);
|
||||
|
||||
if (!args.empty()) {
|
||||
args.push_back(p.space());
|
||||
}
|
||||
args.push_back(named_arg);
|
||||
}
|
||||
|
||||
// Build required arg sequence in definition order
|
||||
common_peg_parser args_seq = p.eps();
|
||||
for (size_t i = 0; i < required_parsers.size(); i++) {
|
||||
if (i > 0) {
|
||||
args_seq = args_seq + p.space();
|
||||
}
|
||||
args_seq = args_seq + required_parsers[i];
|
||||
}
|
||||
|
||||
// Build optional args with flexible ordering
|
||||
if (!optional_parsers.empty()) {
|
||||
common_peg_parser any_opt = p.choice();
|
||||
for (const auto & opt : optional_parsers) {
|
||||
any_opt |= opt;
|
||||
}
|
||||
args_seq = args_seq + p.repeat(p.space() + any_opt, 0, (int) optional_parsers.size());
|
||||
}
|
||||
common_peg_parser args_seq = p.sequence(args);
|
||||
|
||||
// Build call_id parser based on position (if supported)
|
||||
common_peg_parser call_id_section = p.eps();
|
||||
|
|
@ -361,7 +343,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
|
|||
func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
|
||||
call_id_section) + p.space() + args_seq;
|
||||
matched_atomic = true;
|
||||
} else if (!arguments.name_prefix.empty() && !required_parsers.empty()) {
|
||||
} else if (!arguments.name_prefix.empty() && !required.empty()) {
|
||||
// Only peek for an arg tag when there are required args that must follow.
|
||||
// When all args are optional, the model may emit no arg tags at all (#20650).
|
||||
func_parser = p.atomic(p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
|
||||
|
|
|
|||
|
|
@ -1557,6 +1557,36 @@ static std::unordered_set<std::string> collect_reachable_rules(
|
|||
|
||||
// GBNF generation implementation
|
||||
void common_peg_arena::build_grammar(const common_grammar_builder & builder, bool lazy) const {
|
||||
auto schema_delegates = [](const common_peg_schema_parser & s) -> bool {
|
||||
if (!s.schema) {
|
||||
return true;
|
||||
}
|
||||
if (s.raw && s.schema->contains("type") && s.schema->at("type").is_string() && s.schema->at("type") == "string") {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Unwrap the parser so we can properly check if it's a sequence or choice
|
||||
auto effective_parser = [&](common_peg_parser_id id) -> const common_peg_parser_variant & {
|
||||
while (true) {
|
||||
const auto & p = parsers_.at(id);
|
||||
if (const auto * tag = std::get_if<common_peg_tag_parser>(&p)) {
|
||||
id = tag->child;
|
||||
} else if (const auto * atomic = std::get_if<common_peg_atomic_parser>(&p)) {
|
||||
id = atomic->child;
|
||||
} else if (const auto * schema = std::get_if<common_peg_schema_parser>(&p)) {
|
||||
if (schema_delegates(*schema)) {
|
||||
id = schema->child;
|
||||
} else {
|
||||
return p;
|
||||
}
|
||||
} else {
|
||||
return p;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Generate GBNF for a parser
|
||||
std::function<std::string(common_peg_parser_id)> to_gbnf = [&](common_peg_parser_id id) -> std::string {
|
||||
const auto & parser = parsers_.at(id);
|
||||
|
|
@ -1577,7 +1607,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
|
|||
s += " ";
|
||||
}
|
||||
auto child_gbnf = to_gbnf(child);
|
||||
const auto & child_parser = parsers_.at(child);
|
||||
const auto & child_parser = effective_parser(child);
|
||||
if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
|
||||
std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
|
||||
s += "(" + child_gbnf + ")";
|
||||
|
|
@ -1593,7 +1623,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
|
|||
s += " | ";
|
||||
}
|
||||
auto child_gbnf = to_gbnf(child);
|
||||
const auto & child_parser = parsers_.at(child);
|
||||
const auto & child_parser = effective_parser(child);
|
||||
if (std::holds_alternative<common_peg_choice_parser>(child_parser)) {
|
||||
s += "(" + child_gbnf + ")";
|
||||
} else {
|
||||
|
|
@ -1603,7 +1633,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
|
|||
return s;
|
||||
} else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
|
||||
auto child_gbnf = to_gbnf(p.child);
|
||||
const auto & child_parser = parsers_.at(p.child);
|
||||
const auto & child_parser = effective_parser(p.child);
|
||||
if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
|
||||
std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
|
||||
child_gbnf = "(" + child_gbnf + ")";
|
||||
|
|
@ -1663,15 +1693,10 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
|
|||
}
|
||||
return gbnf_excluding_pattern(p.delimiters);
|
||||
} else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
|
||||
if (p.schema) {
|
||||
if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && p.schema->at("type") == "string") {
|
||||
// TODO: Implement more comprehensive grammar generation for raw strings.
|
||||
// For now, use the grammar emitted from the underlying parser.
|
||||
return to_gbnf(p.child);
|
||||
}
|
||||
return builder.add_schema(p.name, *p.schema);
|
||||
if (schema_delegates(p)) {
|
||||
return to_gbnf(p.child);
|
||||
}
|
||||
return to_gbnf(p.child);
|
||||
return builder.add_schema(p.name, *p.schema);
|
||||
} else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
|
||||
return p.name;
|
||||
} else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
|
||||
|
|
|
|||
|
|
@ -213,6 +213,66 @@ void test_gbnf_generation(testing &t) {
|
|||
)""", gbnf);
|
||||
});
|
||||
|
||||
t.test("tagged choice inside sequence gets parenthesized", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.literal("a") + p.tag("t", p.literal("b") | p.literal("c"));
|
||||
});
|
||||
|
||||
auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
|
||||
parser.build_grammar(builder);
|
||||
});
|
||||
|
||||
assert_gbnf_equal(t, R"""(
|
||||
root ::= "a" ("b" | "c")
|
||||
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
||||
)""", gbnf);
|
||||
});
|
||||
|
||||
t.test("tagged sequence inside choice gets parenthesized", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.tag("t", p.literal("a") + p.literal("b")) | p.literal("c");
|
||||
});
|
||||
|
||||
auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
|
||||
parser.build_grammar(builder);
|
||||
});
|
||||
|
||||
assert_gbnf_equal(t, R"""(
|
||||
root ::= "a" "b" | "c"
|
||||
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
||||
)""", gbnf);
|
||||
});
|
||||
|
||||
t.test("atomic choice inside repetition gets parenthesized", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.one_or_more(p.atomic(p.literal("a") | p.literal("b")));
|
||||
});
|
||||
|
||||
auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
|
||||
parser.build_grammar(builder);
|
||||
});
|
||||
|
||||
assert_gbnf_equal(t, R"""(
|
||||
root ::= ("a" | "b")+
|
||||
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
||||
)""", gbnf);
|
||||
});
|
||||
|
||||
t.test("nested transparent wrappers get parenthesized", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
return p.literal("x") + p.tag("outer", p.atomic(p.literal("a") | p.literal("b")));
|
||||
});
|
||||
|
||||
auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
|
||||
parser.build_grammar(builder);
|
||||
});
|
||||
|
||||
assert_gbnf_equal(t, R"""(
|
||||
root ::= "x" ("a" | "b")
|
||||
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
||||
)""", gbnf);
|
||||
});
|
||||
|
||||
t.test("emit only trigger rules (and references)", [](testing &t) {
|
||||
auto parser = build_peg_parser([](common_peg_parser_builder & p) {
|
||||
auto rule1 = p.rule("rule-1", p.literal("a") + p.ref("rule-2"));
|
||||
|
|
|
|||
|
|
@ -2144,57 +2144,19 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
|||
.expect_reconstruction()
|
||||
.run();
|
||||
|
||||
// Test flexible optional argument ordering (2 required + 4 optional, reversed optional order)
|
||||
// Test optional argument ordering (2 required + 4 optional)
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=tool_2req_4opt>\n"
|
||||
"<parameter=req1>\nhello\n</parameter>\n"
|
||||
"<parameter=req2>\n42\n</parameter>\n"
|
||||
"<parameter=opt4>\n100\n</parameter>\n"
|
||||
"<parameter=opt2>\n200\n</parameter>\n"
|
||||
"<parameter=opt4>\n100\n</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.tools({ tool_2req_4opt })
|
||||
.expect_tool_calls({
|
||||
{ "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt4": 100, "opt2": 200})", {} },
|
||||
})
|
||||
.expect_reconstruction()
|
||||
.run();
|
||||
|
||||
// Test flexible optional argument ordering (2 required + 5 optional, reversed optional order)
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=tool_2req_5opt>\n"
|
||||
"<parameter=req1>\nworld\n</parameter>\n"
|
||||
"<parameter=req2>\n7\n</parameter>\n"
|
||||
"<parameter=opt5>\nlast\n</parameter>\n"
|
||||
"<parameter=opt3>\nmiddle\n</parameter>\n"
|
||||
"<parameter=opt1>\nfirst\n</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.tools({ tool_2req_5opt })
|
||||
.expect_tool_calls({
|
||||
{ "tool_2req_5opt", R"({"req1": "world", "req2": 7, "opt5": "last", "opt3": "middle", "opt1": "first"})", {} },
|
||||
})
|
||||
.expect_reconstruction()
|
||||
.run();
|
||||
|
||||
// Test flexible optional argument ordering (2 required + 5 optional, all 5 in shuffled order)
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=tool_2req_5opt>\n"
|
||||
"<parameter=req1>\ntest\n</parameter>\n"
|
||||
"<parameter=req2>\n99\n</parameter>\n"
|
||||
"<parameter=opt3>\nc\n</parameter>\n"
|
||||
"<parameter=opt1>\na\n</parameter>\n"
|
||||
"<parameter=opt5>\ne\n</parameter>\n"
|
||||
"<parameter=opt4>\n4\n</parameter>\n"
|
||||
"<parameter=opt2>\n2\n</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.tools({ tool_2req_5opt })
|
||||
.expect_tool_calls({
|
||||
{ "tool_2req_5opt", R"({"req1": "test", "req2": 99, "opt3": "c", "opt1": "a", "opt5": "e", "opt4": 4, "opt2": 2})", {} },
|
||||
{ "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt2": 200, "opt4": 100})", {} },
|
||||
})
|
||||
.expect_reconstruction()
|
||||
.run();
|
||||
|
|
|
|||
Loading…
Reference in New Issue