diff --git a/common/chat.cpp b/common/chat.cpp index 47a34d5822..04fe8bc070 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -3129,15 +3129,15 @@ static common_chat_params common_chat_templates_apply_jinja( } // Qwen3-Coder XML format detection (must come before Hermes 2 Pro) - // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates. - // Require presence of , , and blocks. + // Detect via XML markers: , , and blocks. + // Also matches Step-3.5-Flash and Nemotron 3 Nano which use the same output format. if (src.find("") != std::string::npos && - src.find("") != std::string::npos && src.find("") != std::string::npos && src.find(" support (Step-3.5-Flash, Nemotron 3 Nano) use the + // Nemotron v3 PEG parser for streaming and schema-aware parameter parsing. + // Qwen3-Coder has no in its template. if (src.find("") != std::string::npos) { return common_chat_params_init_nemotron_v3(tmpl, params); } diff --git a/models/templates/stepfun-ai-Step-3.5-Flash.jinja b/models/templates/stepfun-ai-Step-3.5-Flash.jinja new file mode 100644 index 0000000000..c09ea497da --- /dev/null +++ b/models/templates/stepfun-ai-Step-3.5-Flash.jinja @@ -0,0 +1,80 @@ +{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}{% endif %}{% endfor %}{% endif %}{% endmacro %} +{{bos_token}}{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- render_content(messages[0].content) + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson(ensure_ascii=False) }} + {%- endfor %} + {{- "\n\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner \n...\n block must be nested within \n...\n XML tags\n- Required parameters MUST be specified\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('') and render_content(message.content).endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- set content = render_content(message.content) %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %} + {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = render_content(message.reasoning_content) %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- else %} + {%- set reasoning_content = '' %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n' }} + {%- if tool_call.arguments is defined %} + {%- set arguments = tool_call.arguments %} + {%- for args_name, args_value in arguments|items %} + {{- '\n' }} + {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>tool_response\n' }} + {%- endif %} + {{- '' }} + {{- content }} + {{- '' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n\n' }} +{%- endif %} diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4378a8db71..1bef5b9f44 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -3553,6 +3553,28 @@ Hey there!<|im_end|> auto grammar = build_grammar(params.grammar); GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types"); } + + { + // Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3, + // but with support. Routes to the Nemotron v3 PEG parser for streaming and + // schema-aware parameter parsing. + auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja"); + assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + + // Grammar and PEG parser should be generated with thinking_forced_open + { + common_chat_templates_inputs inputs; + inputs.messages = { message_user }; + inputs.tools = { special_function_tool }; + auto params = common_chat_templates_apply(tmpls.get(), inputs); + assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, params.format); + assert_equals(true, params.thinking_forced_open); + assert_equals(false, params.grammar.empty()); + assert_equals(false, params.parser.empty()); + auto grammar = build_grammar(params.grammar); + GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar"); + } + } } static void test_template_output_peg_parsers() { @@ -3799,6 +3821,196 @@ static void test_template_output_peg_parsers() { }); } + { + // Step-3.5-Flash (uses Nemotron v3 PEG parser with thinking_forced_open) + // Unlike Nemotron, Step-3.5-Flash always emits regardless of enable_thinking, + // so all inputs must include a delimiter. + auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja"); + + // Test basic message with reasoning + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = "I'm\nthinking\n\nHello, world!\nWhat's up?"; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + + t.expect = message_assist_thoughts; + }); + + // Test basic message without thinking content + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = "\nHello, world!\nWhat's up?"; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + + t.expect = message_assist; + }); + + // Test tool call without thinking content + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.tools = {special_function_tool}; + + t.expect = message_assist_call; + }); + + // Test tool call with thinking + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "I'm\nthinking\n\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.tools = {special_function_tool}; + + t.expect = message_assist_call_thoughts; + }); + + // Test parallel tool calls with thinking + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "I'm\nthinking\n\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "2\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.parallel_tool_calls = true; + t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; + + t.expect.reasoning_content = "I'm\nthinking"; + t.expect.tool_calls = {{ + /* .name = */ "special_function", + /* .arguments = */ R"({"arg1": 1})", + /* .id = */ {}, + }, { + /* .name = */ "special_function_with_opt", + /* .arguments = */ R"({"arg1": 1, "arg2": 2})", + /* .id = */ {}, + }}; + }); + + // Test parallel tool calls without thinking content + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "2\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.parallel_tool_calls = true; + t.params.tools = {special_function_tool, special_function_tool_with_optional_param}; + + t.expect.tool_calls = {{ + /* .name = */ "special_function", + /* .arguments = */ R"({"arg1": 1})", + /* .id = */ {}, + }, { + /* .name = */ "special_function_with_opt", + /* .arguments = */ R"({"arg1": 1, "arg2": 2})", + /* .id = */ {}, + }}; + }); + + // Test tool call with code string parameter + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "\n" + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.tools = {python_tool}; + + t.expect.tool_calls = {{ + /* .name = */ "python", + /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", + /* .id = */ {}, + }}; + }); + + // Test tool call with string parameter and no closing tag + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "\n" + "\n" + "\n" + "\n" + "def hello():\n" + " print(\"Hello, world!\")\n" + "\n" + "hello()\n" + "\n" + ""; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.tools = {python_tool}; + + t.expect.tool_calls = {{ + /* .name = */ "python", + /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", + /* .id = */ {}, + }}; + }); + + // Test response format (JSON schema with thinking) + test_peg_parser(tmpls.get(), [&](auto & t) { + t.input = + "I need to output the invoice details in JSON\n" + "\n" + R"({"amount": 123.45, "date": "2025-12-03"})"; + t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; + t.params.json_schema = invoice_schema; + + t.expect.reasoning_content = "I need to output the invoice details in JSON"; + t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})"; + }); + } + { // Solar-Open-100B auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");