diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp index a80900ff8d..56d8bb410a 100644 --- a/common/chat-parser-xml-toolcall.cpp +++ b/common/chat-parser-xml-toolcall.cpp @@ -279,6 +279,7 @@ void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end)); auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end); builder.add_rule("root", + std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") + tool_call_multiple_with_end + "?" + (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end)) diff --git a/common/chat.cpp b/common/chat.cpp index 47a34d5822..8abb6ba5f1 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1884,7 +1884,18 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c data.prompt = apply(tmpl, params); data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML; + // Handle thinking tags (e.g. Step-3.5-Flash unconditionally emits ) + if (string_ends_with(data.prompt, "\n")) { + if (!params.enable_thinking) { + data.prompt += ""; + } else { + data.thinking_forced_open = true; + } + } + data.preserved_tokens = { + "", + "", "", "", ", , and blocks. + // Detect via XML markers: , , and blocks. + // Also matches Step-3.5-Flash which uses the same output format. if (src.find("") != std::string::npos && - src.find("") != std::string::npos && src.find("") != std::string::npos && src.find("") != std::string::npos) { + // Nemotron 3 Nano 30B A3B: also has bare and plural , + // which Step-3.5-Flash lacks despite also having + if (src.find("") != std::string::npos && + src.find("") != std::string::npos && + src.find("") != std::string::npos) { return common_chat_params_init_nemotron_v3(tmpl, params); } return common_chat_params_init_qwen3_coder_xml(tmpl, params); diff --git a/models/templates/stepfun-ai-Step-3.5-Flash.jinja b/models/templates/stepfun-ai-Step-3.5-Flash.jinja new file mode 100644 index 0000000000..c09ea497da --- /dev/null +++ b/models/templates/stepfun-ai-Step-3.5-Flash.jinja @@ -0,0 +1,80 @@ +{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}{% endif %}{% endfor %}{% endif %}{% endmacro %} +{{bos_token}}{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- render_content(messages[0].content) + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson(ensure_ascii=False) }} + {%- endfor %} + {{- "\n\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner \n...\n block must be nested within \n...\n XML tags\n- Required parameters MUST be specified\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('') and render_content(message.content).endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- set content = render_content(message.content) %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %} + {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = render_content(message.reasoning_content) %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- else %} + {%- set reasoning_content = '' %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n' }} + {%- if tool_call.arguments is defined %} + {%- set arguments = tool_call.arguments %} + {%- for args_name, args_value in arguments|items %} + {{- '\n' }} + {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>tool_response\n' }} + {%- endif %} + {{- '' }} + {{- content }} + {{- '' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n\n' }} +{%- endif %} diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4378a8db71..354b74c8ee 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -3553,6 +3553,12 @@ Hey there!<|im_end|> auto grammar = build_grammar(params.grammar); GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types"); } + + { + // Step-3.5-Flash template (uses same XML format as Qwen3-Coder but lacks and markers) + auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja"); + assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + } } static void test_template_output_peg_parsers() {