diff --git a/common/chat.cpp b/common/chat.cpp index 8abb6ba5f1..d87cb682c5 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -3141,16 +3141,15 @@ static common_chat_params common_chat_templates_apply_jinja( // Qwen3-Coder XML format detection (must come before Hermes 2 Pro) // Detect via XML markers: , , and blocks. - // Also matches Step-3.5-Flash which uses the same output format. + // Also matches Step-3.5-Flash and Nemotron 3 Nano which use the same output format. if (src.find("") != std::string::npos && src.find("") != std::string::npos && - src.find("") != std::string::npos && - src.find("") != std::string::npos) { + // Models with support (Step-3.5-Flash, Nemotron 3 Nano) use the + // Nemotron v3 PEG parser for streaming and schema-aware parameter parsing. + // Qwen3-Coder has no in its template. + if (src.find("") != std::string::npos) { return common_chat_params_init_nemotron_v3(tmpl, params); } return common_chat_params_init_qwen3_coder_xml(tmpl, params); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 354b74c8ee..1bf6e4d9ec 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -3555,9 +3555,26 @@ Hey there!<|im_end|> } { - // Step-3.5-Flash template (uses same XML format as Qwen3-Coder but lacks and markers) + // Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3, + // but with support. Routes to the Nemotron v3 PEG parser for streaming and + // schema-aware parameter parsing. auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja"); - assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + + // Grammar and PEG parser should be generated with thinking_forced_open + { + common_chat_templates_inputs inputs; + inputs.messages = { message_user }; + inputs.tools = { special_function_tool }; + inputs.enable_thinking = true; + auto params = common_chat_templates_apply(tmpls.get(), inputs); + assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, params.format); + assert_equals(true, params.thinking_forced_open); + assert_equals(false, params.grammar.empty()); + assert_equals(false, params.parser.empty()); + auto grammar = build_grammar(params.grammar); + GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar with thinking_forced_open"); + } } } @@ -3805,6 +3822,196 @@ static void test_template_output_peg_parsers() { }); } + { + // Step-3.5-Flash (uses Nemotron v3 PEG parser with thinking_forced_open) + // Unlike Nemotron, Step-3.5-Flash always emits regardless of enable_thinking, + // so all inputs must include a delimiter. + auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja"); + + // Test basic message with reasoning + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = "I'm\nthinking\n\nHello, world!\nWhat's up?"; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + + t.expect = message_assist_thoughts; + }); + + // Test basic message without thinking content + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = "\nHello, world!\nWhat's up?"; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + + t.expect = message_assist; + }); + + // Test tool call without thinking content + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.tools = {special_function_tool}; + + t.expect = message_assist_call; + }); + + // Test tool call with thinking + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "I'm\nthinking\n\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.tools = {special_function_tool}; + + t.expect = message_assist_call_thoughts; + }); + + // Test parallel tool calls with thinking + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "I'm\nthinking\n\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "2\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.parallel_tool_calls = true; + t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; + + t.expect.reasoning_content = "I'm\nthinking"; + t.expect.tool_calls = {{ + /* .name = */ "special_function", + /* .arguments = */ R"({"arg1": 1})", + /* .id = */ {}, + }, { + /* .name = */ "special_function_with_opt", + /* .arguments = */ R"({"arg1": 1, "arg2": 2})", + /* .id = */ {}, + }}; + }); + + // Test parallel tool calls without thinking content + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "2\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.parallel_tool_calls = true; + t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; + + t.expect.tool_calls = {{ + /* .name = */ "special_function", + /* .arguments = */ R"({"arg1": 1})", + /* .id = */ {}, + }, { + /* .name = */ "special_function_with_opt", + /* .arguments = */ R"({"arg1": 1, "arg2": 2})", + /* .id = */ {}, + }}; + }); + + // Test tool call with code string parameter + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "\n" + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.tools = {python_tool}; + + t.expect.tool_calls = {{ + /* .name = */ "python", + /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", + /* .id = */ {}, + }}; + }); + + // Test tool call with string parameter and no closing tag + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "\n" + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.tools = {python_tool}; + + t.expect.tool_calls = {{ + /* .name = */ "python", + /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", + /* .id = */ {}, + }}; + }); + + // Test response format (JSON schema with thinking) + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "I need to output the invoice details in JSON\n" + "\n" + R"({"amount": 123.45, "date": "2025-12-03"})"; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.json_schema = invoice_schema; + + t.expect.reasoning_content = "I need to output the invoice details in JSON"; + t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})"; + }); + } + { // Solar-Open-100B auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");