common : fix tool call type detection for nullable and enum schemas (#21327)
* common : fix tool call type detection for nullable and enum schemas
* common, tests : fix grammar delegation for nullable/enum schemas and add tests
Fix enum type inference to scan all enum values (not just index 0) so
schemas like {"enum": [0, "celsius"]} correctly detect string type.
Fix schema_delegates in peg-parser to handle nullable type arrays
(["string", "null"]) and typeless enum schemas in raw mode, allowing
the tagged parser to use raw text instead of JSON-formatted strings.
Add test cases for Qwen3-Coder (TAG_WITH_TAGGED format):
- nullable string ["string", "null"]
- nullable string with null first ["null", "string"]
- nullable integer ["integer", "null"]
- enum without explicit type key
This commit is contained in:
parent
277ff5fff7
commit
af5c13841f
|
|
@ -400,12 +400,34 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
|
||||||
for (const auto & [param_name, param_schema] : properties.items()) {
|
for (const auto & [param_name, param_schema] : properties.items()) {
|
||||||
bool is_required = required.find(param_name) != required.end();
|
bool is_required = required.find(param_name) != required.end();
|
||||||
std::string type = "object";
|
std::string type = "object";
|
||||||
auto type_obj = param_schema.contains("type") ? param_schema.at("type") : json::object();
|
if (param_schema.contains("type")) {
|
||||||
if (type_obj.is_string()) {
|
const auto & type_obj = param_schema.at("type");
|
||||||
type_obj.get_to(type);
|
if (type_obj.is_string()) {
|
||||||
} else if (type_obj.is_object()) {
|
type_obj.get_to(type);
|
||||||
if (type_obj.contains("type") && type_obj.at("type").is_string()) {
|
} else if (type_obj.is_array()) {
|
||||||
type_obj.at("type").get_to(type);
|
// Handle nullable types like ["string", "null"]
|
||||||
|
for (const auto & t : type_obj) {
|
||||||
|
if (t.is_string() && t.get<std::string>() != "null") {
|
||||||
|
type = t.get<std::string>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (type_obj.is_object()) {
|
||||||
|
if (type_obj.contains("type") && type_obj.at("type").is_string()) {
|
||||||
|
type_obj.at("type").get_to(type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Infer string type from enum values when type is unspecified
|
||||||
|
if (type == "object" && param_schema.contains("enum")) {
|
||||||
|
const auto & enum_vals = param_schema.at("enum");
|
||||||
|
if (enum_vals.is_array()) {
|
||||||
|
for (const auto & v : enum_vals) {
|
||||||
|
if (v.is_string()) {
|
||||||
|
type = "string";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -574,9 +596,33 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
|
||||||
std::vector<arg_entry> arg_entries;
|
std::vector<arg_entry> arg_entries;
|
||||||
|
|
||||||
for (const auto & [param_name, param_schema] : properties.items()) {
|
for (const auto & [param_name, param_schema] : properties.items()) {
|
||||||
std::string type = "object";
|
std::string type = "object";
|
||||||
auto type_v = param_schema.contains("type") ? param_schema.at("type") : json::object();
|
if (param_schema.contains("type")) {
|
||||||
if (type_v.is_string()) type_v.get_to(type);
|
const auto & type_v = param_schema.at("type");
|
||||||
|
if (type_v.is_string()) {
|
||||||
|
type_v.get_to(type);
|
||||||
|
} else if (type_v.is_array()) {
|
||||||
|
// Handle nullable types like ["string", "null"]
|
||||||
|
for (const auto & t : type_v) {
|
||||||
|
if (t.is_string() && t.get<std::string>() != "null") {
|
||||||
|
type = t.get<std::string>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Infer string type from enum values when type is unspecified
|
||||||
|
if (type == "object" && param_schema.contains("enum")) {
|
||||||
|
const auto & enum_vals = param_schema.at("enum");
|
||||||
|
if (enum_vals.is_array()) {
|
||||||
|
for (const auto & v : enum_vals) {
|
||||||
|
if (v.is_string()) {
|
||||||
|
type = "string";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
common_peg_parser value_parser = p.eps();
|
common_peg_parser value_parser = p.eps();
|
||||||
if (type == "string") {
|
if (type == "string") {
|
||||||
|
|
|
||||||
|
|
@ -1561,7 +1561,23 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
|
||||||
if (!s.schema) {
|
if (!s.schema) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (s.raw && s.schema->contains("type") && s.schema->at("type").is_string() && s.schema->at("type") == "string") {
|
if (s.raw && s.schema->contains("type")) {
|
||||||
|
const auto & type_val = s.schema->at("type");
|
||||||
|
if (type_val.is_string() && type_val == "string") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Handle nullable types like ["string", "null"] - delegate when the
|
||||||
|
// non-null type is string, since the tagged format uses raw text
|
||||||
|
if (type_val.is_array()) {
|
||||||
|
for (const auto & t : type_val) {
|
||||||
|
if (t.is_string() && t.get<std::string>() != "null") {
|
||||||
|
return t.get<std::string>() == "string";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Delegate for enum schemas in raw mode - enum values are literal strings
|
||||||
|
if (s.raw && !s.schema->contains("type") && s.schema->contains("enum")) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -657,6 +657,66 @@ static common_chat_tool imaginary_number_tool{
|
||||||
})",
|
})",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static common_chat_tool nullable_string_tool{
|
||||||
|
/* .name = */ "set_nullable_str",
|
||||||
|
/* .description = */ "Set a nullable string value",
|
||||||
|
/* .parameters = */ R"({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "A nullable string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["name"]
|
||||||
|
})",
|
||||||
|
};
|
||||||
|
|
||||||
|
static common_chat_tool nullable_string_null_first_tool{
|
||||||
|
/* .name = */ "set_nullable_str_nf",
|
||||||
|
/* .description = */ "Set a nullable string value with null first in type array",
|
||||||
|
/* .parameters = */ R"({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"type": ["null", "string"],
|
||||||
|
"description": "A nullable string with null first"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["name"]
|
||||||
|
})",
|
||||||
|
};
|
||||||
|
|
||||||
|
static common_chat_tool nullable_int_tool{
|
||||||
|
/* .name = */ "set_nullable_int",
|
||||||
|
/* .description = */ "Set a nullable integer value",
|
||||||
|
/* .parameters = */ R"({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"count": {
|
||||||
|
"type": ["integer", "null"],
|
||||||
|
"description": "A nullable integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["count"]
|
||||||
|
})",
|
||||||
|
};
|
||||||
|
|
||||||
|
static common_chat_tool enum_no_type_tool{
|
||||||
|
/* .name = */ "set_unit",
|
||||||
|
/* .description = */ "Set a temperature unit",
|
||||||
|
/* .parameters = */ R"({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"unit": {
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
"description": "Temperature unit"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["unit"]
|
||||||
|
})",
|
||||||
|
};
|
||||||
|
|
||||||
static common_chat_tool string_param_tool{
|
static common_chat_tool string_param_tool{
|
||||||
/* .name = */ "string_param",
|
/* .name = */ "string_param",
|
||||||
/* .description = */ "Tool with string parameter for testing",
|
/* .description = */ "Tool with string parameter for testing",
|
||||||
|
|
@ -2200,6 +2260,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.run();
|
.run();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
@ -2383,6 +2444,58 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||||
})
|
})
|
||||||
.expect_reconstruction()
|
.expect_reconstruction()
|
||||||
.run();
|
.run();
|
||||||
|
|
||||||
|
// nullable string type ["string", "null"]
|
||||||
|
tst.test(
|
||||||
|
"<tool_call>\n"
|
||||||
|
"<function=set_nullable_str>\n"
|
||||||
|
"<parameter=name>\nhello world\n</parameter>\n"
|
||||||
|
"</function>\n"
|
||||||
|
"</tool_call>")
|
||||||
|
.tools({ nullable_string_tool })
|
||||||
|
.expect_tool_calls({
|
||||||
|
{ "set_nullable_str", R"({"name": "hello world"})", {} },
|
||||||
|
})
|
||||||
|
.run();
|
||||||
|
|
||||||
|
// nullable string with null first in type array ["null", "string"]
|
||||||
|
tst.test(
|
||||||
|
"<tool_call>\n"
|
||||||
|
"<function=set_nullable_str_nf>\n"
|
||||||
|
"<parameter=name>\nhello world\n</parameter>\n"
|
||||||
|
"</function>\n"
|
||||||
|
"</tool_call>")
|
||||||
|
.tools({ nullable_string_null_first_tool })
|
||||||
|
.expect_tool_calls({
|
||||||
|
{ "set_nullable_str_nf", R"({"name": "hello world"})", {} },
|
||||||
|
})
|
||||||
|
.run();
|
||||||
|
|
||||||
|
// nullable integer type ["integer", "null"] - should use JSON value path, not string
|
||||||
|
tst.test(
|
||||||
|
"<tool_call>\n"
|
||||||
|
"<function=set_nullable_int>\n"
|
||||||
|
"<parameter=count>\n42\n</parameter>\n"
|
||||||
|
"</function>\n"
|
||||||
|
"</tool_call>")
|
||||||
|
.tools({ nullable_int_tool })
|
||||||
|
.expect_tool_calls({
|
||||||
|
{ "set_nullable_int", R"({"count": 42})", {} },
|
||||||
|
})
|
||||||
|
.run();
|
||||||
|
|
||||||
|
// enum without explicit type key - should infer string from enum values
|
||||||
|
tst.test(
|
||||||
|
"<tool_call>\n"
|
||||||
|
"<function=set_unit>\n"
|
||||||
|
"<parameter=unit>\ncelsius\n</parameter>\n"
|
||||||
|
"</function>\n"
|
||||||
|
"</tool_call>")
|
||||||
|
.tools({ enum_no_type_tool })
|
||||||
|
.expect_tool_calls({
|
||||||
|
{ "set_unit", R"({"unit": "celsius"})", {} },
|
||||||
|
})
|
||||||
|
.run();
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
|
auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue