common : fix Step-3.5-Flash format detection and thinking support
Step-3.5-Flash uses the same XML-style tool call format as Qwen3-Coder (<tool_call><function=...><parameter=...>) but its Jinja template lacks the bare <function> and plural <parameters> markers that the detection logic previously required. This caused it to fall through to Hermes 2 Pro, which doesn't call func_args_not_string(), so arguments stayed as JSON strings and templates using arguments|items crashed. Additionally, the Qwen3-Coder-XML format handler had no thinking support. Models like Step-3.5-Flash that unconditionally emit <think> in their generation prompt need the same thinking_forced_open handling that Nemotron v3 and Hermes 2 Pro already have, otherwise reasoning_content is never separated from content in API responses. Changes: - Relax Qwen3-Coder XML detection to only require the 3 shared markers - Tighten Nemotron v3 branch to also require bare <function> and plural <parameters>, preventing Step-3.5-Flash from being misrouted via <think> - Add thinking_forced_open support to Qwen3-Coder-XML init function - Add <think>/</think> to preserved tokens - Fix build_grammar_xml_tool_call to handle thinking_forced_open in the grammar root rule, allowing </think> before tool calls - Add Step-3.5-Flash chat template and format detection test Builds on: https://github.com/ggml-org/llama.cpp/pull/19283
This commit is contained in:
parent
684b36101c
commit
db38820013
|
|
@ -279,6 +279,7 @@ void build_grammar_xml_tool_call(common_chat_params & data, const json & tools,
|
|||
auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
|
||||
auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
|
||||
builder.add_rule("root",
|
||||
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
||||
(form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
|
||||
tool_call_multiple_with_end + "?" +
|
||||
(form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
|
||||
|
|
|
|||
|
|
@ -1884,7 +1884,18 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c
|
|||
data.prompt = apply(tmpl, params);
|
||||
data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
|
||||
|
||||
// Handle thinking tags (e.g. Step-3.5-Flash unconditionally emits <think>)
|
||||
if (string_ends_with(data.prompt, "<think>\n")) {
|
||||
if (!params.enable_thinking) {
|
||||
data.prompt += "</think>";
|
||||
} else {
|
||||
data.thinking_forced_open = true;
|
||||
}
|
||||
}
|
||||
|
||||
data.preserved_tokens = {
|
||||
"<think>",
|
||||
"</think>",
|
||||
"<tool_call>",
|
||||
"</tool_call>",
|
||||
"<function=",
|
||||
|
|
@ -3129,16 +3140,17 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|||
}
|
||||
|
||||
// Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
|
||||
// Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
|
||||
// Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
|
||||
// Detect via XML markers: <tool_call>, <function=...>, and <parameter=...> blocks.
|
||||
// Also matches Step-3.5-Flash which uses the same output format.
|
||||
if (src.find("<tool_call>") != std::string::npos &&
|
||||
src.find("<function>") != std::string::npos &&
|
||||
src.find("<function=") != std::string::npos &&
|
||||
src.find("<parameters>") != std::string::npos &&
|
||||
src.find("<parameter=") != std::string::npos) {
|
||||
workaround::func_args_not_string(params.messages);
|
||||
// Nemotron 3 Nano 30B A3B
|
||||
if (src.find("<think>") != std::string::npos) {
|
||||
// Nemotron 3 Nano 30B A3B: also has bare <function> and plural <parameters>,
|
||||
// which Step-3.5-Flash lacks despite also having <think>
|
||||
if (src.find("<think>") != std::string::npos &&
|
||||
src.find("<function>") != std::string::npos &&
|
||||
src.find("<parameters>") != std::string::npos) {
|
||||
return common_chat_params_init_nemotron_v3(tmpl, params);
|
||||
}
|
||||
return common_chat_params_init_qwen3_coder_xml(tmpl, params);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,80 @@
|
|||
{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}<im_patch>{% endif %}{% endfor %}{% endif %}{% endmacro %}
|
||||
{{bos_token}}{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- render_content(messages[0].content) + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson(ensure_ascii=False) }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...>\n...\n</function> block must be nested within <tool_call>\n...\n</tool_call> XML tags\n- Required parameters MUST be specified\n</IMPORTANT><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('<tool_response>') and render_content(message.content).endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- set content = render_content(message.content) %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %}
|
||||
{{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = render_content(message.reasoning_content) %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- else %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n' + content }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
||||
{%- if tool_call.arguments is defined %}
|
||||
{%- set arguments = tool_call.arguments %}
|
||||
{%- for args_name, args_value in arguments|items %}
|
||||
{{- '<parameter=' + args_name + '>\n' }}
|
||||
{%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
|
||||
{{- args_value }}
|
||||
{{- '\n</parameter>\n' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '</function>\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>tool_response\n' }}
|
||||
{%- endif %}
|
||||
{{- '<tool_response>' }}
|
||||
{{- content }}
|
||||
{{- '</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n<think>\n' }}
|
||||
{%- endif %}
|
||||
|
|
@ -3553,6 +3553,12 @@ Hey there!<|im_end|>
|
|||
auto grammar = build_grammar(params.grammar);
|
||||
GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
|
||||
}
|
||||
|
||||
{
|
||||
// Step-3.5-Flash template (uses same XML format as Qwen3-Coder but lacks <function> and <parameters> markers)
|
||||
auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
|
||||
assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_template_output_peg_parsers() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue