From af5c13841fb9c2a708af505105cc611e0cc9db51 Mon Sep 17 00:00:00 2001 From: Samanvya Tripathi Date: Fri, 3 Apr 2026 11:51:23 -0400 Subject: [PATCH] common : fix tool call type detection for nullable and enum schemas (#21327) * common : fix tool call type detection for nullable and enum schemas * common, tests : fix grammar delegation for nullable/enum schemas and add tests Fix enum type inference to scan all enum values (not just index 0) so schemas like {"enum": [0, "celsius"]} correctly detect string type. Fix schema_delegates in peg-parser to handle nullable type arrays (["string", "null"]) and typeless enum schemas in raw mode, allowing the tagged parser to use raw text instead of JSON-formatted strings. Add test cases for Qwen3-Coder (TAG_WITH_TAGGED format): - nullable string ["string", "null"] - nullable string with null first ["null", "string"] - nullable integer ["integer", "null"] - enum without explicit type key --- common/chat-auto-parser-generator.cpp | 64 +++++++++++++-- common/peg-parser.cpp | 18 +++- tests/test-chat.cpp | 113 ++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 10 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index 60b269c42d..efa251b99b 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -400,12 +400,34 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte for (const auto & [param_name, param_schema] : properties.items()) { bool is_required = required.find(param_name) != required.end(); std::string type = "object"; - auto type_obj = param_schema.contains("type") ? param_schema.at("type") : json::object(); - if (type_obj.is_string()) { - type_obj.get_to(type); - } else if (type_obj.is_object()) { - if (type_obj.contains("type") && type_obj.at("type").is_string()) { - type_obj.at("type").get_to(type); + if (param_schema.contains("type")) { + const auto & type_obj = param_schema.at("type"); + if (type_obj.is_string()) { + type_obj.get_to(type); + } else if (type_obj.is_array()) { + // Handle nullable types like ["string", "null"] + for (const auto & t : type_obj) { + if (t.is_string() && t.get() != "null") { + type = t.get(); + break; + } + } + } else if (type_obj.is_object()) { + if (type_obj.contains("type") && type_obj.at("type").is_string()) { + type_obj.at("type").get_to(type); + } + } + } + // Infer string type from enum values when type is unspecified + if (type == "object" && param_schema.contains("enum")) { + const auto & enum_vals = param_schema.at("enum"); + if (enum_vals.is_array()) { + for (const auto & v : enum_vals) { + if (v.is_string()) { + type = "string"; + break; + } + } } } @@ -574,9 +596,33 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_ std::vector arg_entries; for (const auto & [param_name, param_schema] : properties.items()) { - std::string type = "object"; - auto type_v = param_schema.contains("type") ? param_schema.at("type") : json::object(); - if (type_v.is_string()) type_v.get_to(type); + std::string type = "object"; + if (param_schema.contains("type")) { + const auto & type_v = param_schema.at("type"); + if (type_v.is_string()) { + type_v.get_to(type); + } else if (type_v.is_array()) { + // Handle nullable types like ["string", "null"] + for (const auto & t : type_v) { + if (t.is_string() && t.get() != "null") { + type = t.get(); + break; + } + } + } + } + // Infer string type from enum values when type is unspecified + if (type == "object" && param_schema.contains("enum")) { + const auto & enum_vals = param_schema.at("enum"); + if (enum_vals.is_array()) { + for (const auto & v : enum_vals) { + if (v.is_string()) { + type = "string"; + break; + } + } + } + } common_peg_parser value_parser = p.eps(); if (type == "string") { diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp index 694f9b850a..86faacd61f 100644 --- a/common/peg-parser.cpp +++ b/common/peg-parser.cpp @@ -1561,7 +1561,23 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo if (!s.schema) { return true; } - if (s.raw && s.schema->contains("type") && s.schema->at("type").is_string() && s.schema->at("type") == "string") { + if (s.raw && s.schema->contains("type")) { + const auto & type_val = s.schema->at("type"); + if (type_val.is_string() && type_val == "string") { + return true; + } + // Handle nullable types like ["string", "null"] - delegate when the + // non-null type is string, since the tagged format uses raw text + if (type_val.is_array()) { + for (const auto & t : type_val) { + if (t.is_string() && t.get() != "null") { + return t.get() == "string"; + } + } + } + } + // Delegate for enum schemas in raw mode - enum values are literal strings + if (s.raw && !s.schema->contains("type") && s.schema->contains("enum")) { return true; } return false; diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 34d50124c4..8605a76784 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -657,6 +657,66 @@ static common_chat_tool imaginary_number_tool{ })", }; +static common_chat_tool nullable_string_tool{ + /* .name = */ "set_nullable_str", + /* .description = */ "Set a nullable string value", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "name": { + "type": ["string", "null"], + "description": "A nullable string" + } + }, + "required": ["name"] + })", +}; + +static common_chat_tool nullable_string_null_first_tool{ + /* .name = */ "set_nullable_str_nf", + /* .description = */ "Set a nullable string value with null first in type array", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "name": { + "type": ["null", "string"], + "description": "A nullable string with null first" + } + }, + "required": ["name"] + })", +}; + +static common_chat_tool nullable_int_tool{ + /* .name = */ "set_nullable_int", + /* .description = */ "Set a nullable integer value", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "count": { + "type": ["integer", "null"], + "description": "A nullable integer" + } + }, + "required": ["count"] + })", +}; + +static common_chat_tool enum_no_type_tool{ + /* .name = */ "set_unit", + /* .description = */ "Set a temperature unit", + /* .parameters = */ R"({ + "type": "object", + "properties": { + "unit": { + "enum": ["celsius", "fahrenheit"], + "description": "Temperature unit" + } + }, + "required": ["unit"] + })", +}; + static common_chat_tool string_param_tool{ /* .name = */ "string_param", /* .description = */ "Tool with string parameter for testing", @@ -2200,6 +2260,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) { } }) .run(); + } { @@ -2383,6 +2444,58 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .expect_reconstruction() .run(); + + // nullable string type ["string", "null"] + tst.test( + "\n" + "\n" + "\nhello world\n\n" + "\n" + "") + .tools({ nullable_string_tool }) + .expect_tool_calls({ + { "set_nullable_str", R"({"name": "hello world"})", {} }, + }) + .run(); + + // nullable string with null first in type array ["null", "string"] + tst.test( + "\n" + "\n" + "\nhello world\n\n" + "\n" + "") + .tools({ nullable_string_null_first_tool }) + .expect_tool_calls({ + { "set_nullable_str_nf", R"({"name": "hello world"})", {} }, + }) + .run(); + + // nullable integer type ["integer", "null"] - should use JSON value path, not string + tst.test( + "\n" + "\n" + "\n42\n\n" + "\n" + "") + .tools({ nullable_int_tool }) + .expect_tool_calls({ + { "set_nullable_int", R"({"count": 42})", {} }, + }) + .run(); + + // enum without explicit type key - should infer string from enum values + tst.test( + "\n" + "\n" + "\ncelsius\n\n" + "\n" + "") + .tools({ enum_no_type_tool }) + .expect_tool_calls({ + { "set_unit", R"({"unit": "celsius"})", {} }, + }) + .run(); } { auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);