diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index bf44091d67..3f036bb5b2 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -65,7 +65,7 @@ common_chat_params peg_generator::generate_parser(const common_chat_template & data.grammar = build_grammar([&](const common_grammar_builder & builder) { foreach_function(inputs.tools, [&](const json & tool) { const auto & function = tool.at("function"); - auto schema = function.at("parameters"); + auto schema = function.contains("parameters") ? function.at("parameters") : json::object(); builder.resolve_refs(schema); }); parser.build_grammar(builder, data.grammar_lazy); @@ -221,7 +221,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context foreach_function(inputs.tools, [&](const json & tool) { const auto & func = tool.at("function"); std::string name = func.at("name"); - const auto & schema = func.at("parameters"); + const auto & schema = func.contains("parameters") ? func.at("parameters") : json::object(); // Build call_id parser based on position (if supported) common_peg_parser call_id_section = p.eps(); @@ -282,19 +282,11 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte common_peg_parser tool_choice = p.choice(); foreach_function(inputs.tools, [&](const json & tool) { - const auto & func = tool.at("function"); - std::string name = func.at("name"); - const auto & params = func.at("parameters"); - - if (!params.contains("properties") || !params.at("properties").is_object()) { - return; - } - - const auto & properties = params.at("properties"); + const auto & func = tool.at("function"); + std::string name = func.at("name"); + const auto & params = func.contains("parameters") ? func.at("parameters") : json::object(); + const auto & properties = params.contains("properties") ? params.at("properties") : json::object(); std::set required; - if (params.contains("required") && params.at("required").is_array()) { - params.at("required").get_to(required); - } // Build parser for each argument, separating required and optional std::vector required_parsers; @@ -311,17 +303,18 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte } } - auto arg = p.tool_arg( - p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + - arguments.name_suffix) + - arguments.value_prefix + - (type == "string" ? p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), - "tool-" + name + "-arg-" + param_name + "-schema", - param_schema, true)) : - p.tool_arg_json_value(p.schema( - p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) + - p.space()) + - p.tool_arg_close(p.literal(arguments.value_suffix))); + auto arg = + p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) + + arguments.name_suffix) + + arguments.value_prefix + + (type == "string" ? + p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix), + "tool-" + name + "-arg-" + param_name + "-schema", + param_schema, true)) : + p.tool_arg_json_value(p.schema( + p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) + + p.space()) + + p.tool_arg_close(p.literal(arguments.value_suffix))); auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg); if (is_required) { diff --git a/models/templates/Qwen3.5-4B.jinja b/models/templates/Qwen3.5-4B.jinja new file mode 100644 index 0000000000..a585dec894 --- /dev/null +++ b/models/templates/Qwen3.5-4B.jinja @@ -0,0 +1,154 @@ +{%- set image_count = namespace(value=0) %} +{%- set video_count = namespace(value=0) %} +{%- macro render_content(content, do_vision_count, is_system_content=false) %} + {%- if content is string %} + {{- content }} + {%- elif content is iterable and content is not mapping %} + {%- for item in content %} + {%- if 'image' in item or 'image_url' in item or item.type == 'image' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain images.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set image_count.value = image_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Picture ' ~ image_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|image_pad|><|vision_end|>' }} + {%- elif 'video' in item or item.type == 'video' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain videos.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set video_count.value = video_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Video ' ~ video_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|video_pad|><|vision_end|>' }} + {%- elif 'text' in item %} + {{- item.text }} + {%- else %} + {{- raise_exception('Unexpected item type in content.') }} + {%- endif %} + {%- endfor %} + {%- elif content is none or content is undefined %} + {{- '' }} + {%- else %} + {{- raise_exception('Unexpected content type.') }} + {%- endif %} +{%- endmacro %} +{%- if not messages %} + {{- raise_exception('No messages provided.') }} +{%- endif %} +{%- if tools and tools is iterable and tools is not mapping %} + {{- '<|im_start|>system\n' }} + {{- "# Tools\n\nYou have access to the following functions:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n" }} + {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {%- if content %} + {{- '\n\n' + content }} + {%- endif %} + {%- endif %} + {{- '<|im_end|>\n' }} +{%- else %} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" %} + {%- set content = render_content(message.content, false)|trim %} + {%- if not(content.startswith('') and content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if ns.multi_step_tool %} + {{- raise_exception('No user query found in messages.') }} +{%- endif %} +{%- for message in messages %} + {%- set content = render_content(message.content, true)|trim %} + {%- if message.role == "system" %} + {%- if not loop.first %} + {{- raise_exception('System message must be at the beginning.') }} + {%- endif %} + {%- elif message.role == "user" %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- set reasoning_content = reasoning_content|trim %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if loop.first %} + {%- if content|trim %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n\n' }} + {%- endif %} + {%- else %} + {{- '\n\n\n' }} + {%- endif %} + {%- if tool_call.arguments is defined %} + {%- for args_name, args_value in tool_call.arguments|items %} + {{- '\n' }} + {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.previtem and loop.previtem.role != "tool" %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if not loop.last and loop.nextitem.role != "tool" %} + {{- '<|im_end|>\n' }} + {%- elif loop.last %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- else %} + {{- raise_exception('Unexpected message role.') }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index de9104352c..6e11252e12 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -425,6 +425,7 @@ static common_chat_tool special_function_tool_with_optional_param{ "required": ["arg1"] })", }; + static common_chat_tool empty_args_tool{ /* .name = */ "empty_args", /* .description = */ "A tool that takes no arguments", @@ -433,6 +434,15 @@ static common_chat_tool empty_args_tool{ "properties": {} })", }; + +static common_chat_tool empty_args_tool_no_properties{ + /* .name = */ "empty_args_no_props", + /* .description = */ "A tool that takes no arguments and has no properties", + /* .parameters = */ R"({ + "type": "object" + })", +}; + static common_chat_tool python_tool{ /* .name = */ "python", /* .description = */ "an ipython interpreter", @@ -1410,6 +1420,176 @@ static void test_template_output_peg_parsers(bool detailed_debug) { } })"; + { + // Qwen3.5 (basically same as Nemotron, but keeping separate tests just in case) + auto tst = peg_tester("models/templates/Qwen3.5-4B.jinja", detailed_debug); + + tst.test("I'm\nthinkingHello, world!\nWhat's up?") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) + .expect(message_assist_thoughts) + .run(); + + tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_NONE) + .expect_content("\nI'm\nthinking\n\nHello, world!\nWhat's up?") + .run(); + + tst.test("I'm\nthinking\n\nHello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect(message_assist_thoughts) + .run(); + + tst.test( + "\n" + "\n" + "\n1\n\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + tst.test( + "I'm\nthinking\n\n" + "\n" + "\n" + "\n1\n\n" + "\n" + "") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ special_function_tool }) + .expect(message_assist_call_thoughts) + .run(); + + tst.test( + "\n" + "\n" + "\n1\n\n" + "\n" + "\n" + "\n" + "\n" + "\n1\n\n" + "\n2\n\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .parallel_tool_calls(true) + .tools({ + special_function_tool, special_function_tool_with_optional_param + }) + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} }, + }) + .run(); + + tst.test( + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ + python_tool + }) + .expect_tool_calls({ + { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} }, + }) + .run(); + + tst.test( + "I need to output the invoice details in JSON\n" + "\n" + R"({"amount": 123.45, "date": "2025-12-03"})") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .enable_thinking(true) + .json_schema(invoice_schema) + .expect_reasoning("I need to output the invoice details in JSON") + .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})") + .run(); + + // tool call segment in reasoning + tst.test( + "Let's call a tool: \n" + "\n" + "\n" + "def hello():\n" + " print(\"Not the real call!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + "" + ) + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ + python_tool + }) + .expect_reasoning("Let's call a tool: \n" + "\n" + "\n" + "def hello():\n" + " print(\"Not the real call!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + "") + .expect_tool_calls({ + { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} }, + }) + .run(); + + // No args tool + tst.test( + "\n" + "\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ empty_args_tool }) + .expect(message_with_tool_calls("empty_args", "{}")) + .run(); + + // No args tool with no properties defined + tst.test( + "\n" + "\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ empty_args_tool_no_properties }) + .expect(message_with_tool_calls("empty_args_no_props", "{}")) + .run(); + } + { // Ministral-3-14B-Reasoning-2512 auto tst = peg_tester("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", detailed_debug);