From db38820013c30ea785d975ee99606e8dfcb0ab02 Mon Sep 17 00:00:00 2001
From: Jesse Posner <jesse.posner@gmail.com>
Date: Sat, 14 Feb 2026 23:29:39 -0800
Subject: [PATCH 1/3] common : fix Step-3.5-Flash format detection and thinking
 support

Step-3.5-Flash uses the same XML-style tool call format as Qwen3-Coder
(<tool_call><function=...><parameter=...>) but its Jinja template lacks
the bare <function> and plural <parameters> markers that the detection
logic previously required. This caused it to fall through to Hermes 2
Pro, which doesn't call func_args_not_string(), so arguments stayed as
JSON strings and templates using arguments|items crashed.

Additionally, the Qwen3-Coder-XML format handler had no thinking support.
Models like Step-3.5-Flash that unconditionally emit <think> in their
generation prompt need the same thinking_forced_open handling that
Nemotron v3 and Hermes 2 Pro already have, otherwise reasoning_content
is never separated from content in API responses.

Changes:
- Relax Qwen3-Coder XML detection to only require the 3 shared markers
- Tighten Nemotron v3 branch to also require bare <function> and plural
  <parameters>, preventing Step-3.5-Flash from being misrouted via <think>
- Add thinking_forced_open support to Qwen3-Coder-XML init function
- Add <think>/</think> to preserved tokens
- Fix build_grammar_xml_tool_call to handle thinking_forced_open in the
  grammar root rule, allowing </think> before tool calls
- Add Step-3.5-Flash chat template and format detection test

Builds on: https://github.com/ggml-org/llama.cpp/pull/19283
---
 common/chat-parser-xml-toolcall.cpp           |  1 +
 common/chat.cpp                               | 24 ++++--
 .../templates/stepfun-ai-Step-3.5-Flash.jinja | 80 +++++++++++++++++++
 tests/test-chat.cpp                           |  6 ++
 4 files changed, 105 insertions(+), 6 deletions(-)
 create mode 100644 models/templates/stepfun-ai-Step-3.5-Flash.jinja
diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp
index a80900ff8d..56d8bb410a 100644
--- a/common/chat-parser-xml-toolcall.cpp
+++ b/common/chat-parser-xml-toolcall.cpp
@@ -279,6 +279,7 @@ void build_grammar_xml_tool_call(common_chat_params & data, const json & tools,
             auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
             auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
             builder.add_rule("root",
+                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
                 (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
                 tool_call_multiple_with_end  + "?" +
                 (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
diff --git a/common/chat.cpp b/common/chat.cpp
index 47a34d5822..8abb6ba5f1 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1884,7 +1884,18 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c
     data.prompt = apply(tmpl, params);
     data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
 
+    // Handle thinking tags (e.g. Step-3.5-Flash unconditionally emits <think>)
+    if (string_ends_with(data.prompt, "<think>\n")) {
+        if (!params.enable_thinking) {
+            data.prompt += "</think>";
+        } else {
+            data.thinking_forced_open = true;
+        }
+    }
+
     data.preserved_tokens = {
+        "<think>",
+        "</think>",
         "<tool_call>",
         "</tool_call>",
         "<function=",
@@ -3129,16 +3140,17 @@ static common_chat_params common_chat_templates_apply_jinja(
     }
 
     // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
-    // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
-    // Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
+    // Detect via XML markers: <tool_call>, <function=...>, and <parameter=...> blocks.
+    // Also matches Step-3.5-Flash which uses the same output format.
     if (src.find("<tool_call>") != std::string::npos &&
-        src.find("<function>") != std::string::npos &&
         src.find("<function=") != std::string::npos &&
-        src.find("<parameters>") != std::string::npos &&
         src.find("<parameter=") != std::string::npos) {
         workaround::func_args_not_string(params.messages);
-        // Nemotron 3 Nano 30B A3B
-        if (src.find("<think>") != std::string::npos) {
+        // Nemotron 3 Nano 30B A3B: also has bare <function> and plural <parameters>,
+        // which Step-3.5-Flash lacks despite also having <think>
+        if (src.find("<think>") != std::string::npos &&
+            src.find("<function>") != std::string::npos &&
+            src.find("<parameters>") != std::string::npos) {
             return common_chat_params_init_nemotron_v3(tmpl, params);
         }
         return common_chat_params_init_qwen3_coder_xml(tmpl, params);
diff --git a/models/templates/stepfun-ai-Step-3.5-Flash.jinja b/models/templates/stepfun-ai-Step-3.5-Flash.jinja
new file mode 100644
index 0000000000..c09ea497da
--- /dev/null
+++ b/models/templates/stepfun-ai-Step-3.5-Flash.jinja
@@ -0,0 +1,80 @@
+{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}<im_patch>{% endif %}{% endfor %}{% endif %}{% endmacro %}
+{{bos_token}}{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- render_content(messages[0].content) + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson(ensure_ascii=False) }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...>\n...\n</function> block must be nested within <tool_call>\n...\n</tool_call> XML tags\n- Required parameters MUST be specified\n</IMPORTANT><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('<tool_response>') and render_content(message.content).endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- set content = render_content(message.content) %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %}
+        {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = render_content(message.reasoning_content) %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- else %}
+                {%- set reasoning_content = '' %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n' + content }}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function is defined %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                {%- if tool_call.arguments is defined %}
+                    {%- set arguments = tool_call.arguments %}
+                    {%- for args_name, args_value in arguments|items %}
+                        {{- '<parameter=' + args_name + '>\n' }}
+                        {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                        {{- args_value }}
+                        {{- '\n</parameter>\n' }}
+                    {%- endfor %}
+                {%- endif %}
+                {{- '</function>\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>tool_response\n' }}
+        {%- endif %}
+        {{- '<tool_response>' }}
+        {{- content }}
+        {{- '</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n<think>\n' }}
+{%- endif %}
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 4378a8db71..354b74c8ee 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -3553,6 +3553,12 @@ Hey there!<|im_end|>
         auto grammar = build_grammar(params.grammar);
         GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
     }
+
+    {
+        // Step-3.5-Flash template (uses same XML format as Qwen3-Coder but lacks <function> and <parameters> markers)
+        auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
+        assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+    }
 }
 
 static void test_template_output_peg_parsers() {

From ac0f256df0791e5f14cd79b64b8e2ae1ad8b44f8 Mon Sep 17 00:00:00 2001
From: Jesse Posner <jesse.posner@gmail.com>
Date: Sun, 15 Feb 2026 09:24:15 -0800
Subject: [PATCH 2/3] chat : route Step-3.5-Flash to Nemotron v3 PEG parser,
 add tests

Step-3.5-Flash uses the same XML tool call format as Qwen3-Coder and
Nemotron 3 Nano (<tool_call>/<function=...>/<parameter=...>) but with
unconditional <think> output. Route it to the Nemotron v3 PEG parser
for streaming and schema-aware parameter parsing.

Detection: templates with <think> + XML tool tags use Nemotron v3 PEG
parser; templates without <think> (Qwen3-Coder) use GBNF grammar.

Tests cover: basic messages, tool calls with/without thinking content,
parallel tool calls, code string parameters, optional </parameter>
closing tags, and JSON schema response format.
---
 common/chat.cpp     |  11 ++-
 tests/test-chat.cpp | 211 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 214 insertions(+), 8 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 8abb6ba5f1..d87cb682c5 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -3141,16 +3141,15 @@ static common_chat_params common_chat_templates_apply_jinja(
 
     // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
     // Detect via XML markers: <tool_call>, <function=...>, and <parameter=...> blocks.
-    // Also matches Step-3.5-Flash which uses the same output format.
+    // Also matches Step-3.5-Flash and Nemotron 3 Nano which use the same output format.
     if (src.find("<tool_call>") != std::string::npos &&
         src.find("<function=") != std::string::npos &&
         src.find("<parameter=") != std::string::npos) {
         workaround::func_args_not_string(params.messages);
-        // Nemotron 3 Nano 30B A3B: also has bare <function> and plural <parameters>,
-        // which Step-3.5-Flash lacks despite also having <think>
-        if (src.find("<think>") != std::string::npos &&
-            src.find("<function>") != std::string::npos &&
-            src.find("<parameters>") != std::string::npos) {
+        // Models with <think> support (Step-3.5-Flash, Nemotron 3 Nano) use the
+        // Nemotron v3 PEG parser for streaming and schema-aware parameter parsing.
+        // Qwen3-Coder has no <think> in its template.
+        if (src.find("<think>") != std::string::npos) {
             return common_chat_params_init_nemotron_v3(tmpl, params);
         }
         return common_chat_params_init_qwen3_coder_xml(tmpl, params);
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 354b74c8ee..1bf6e4d9ec 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -3555,9 +3555,26 @@ Hey there!<|im_end|>
     }
 
     {
-        // Step-3.5-Flash template (uses same XML format as Qwen3-Coder but lacks <function> and <parameters> markers)
+        // Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3,
+        // but with <think> support. Routes to the Nemotron v3 PEG parser for streaming and
+        // schema-aware parameter parsing.
         auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
-        assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+
+        // Grammar and PEG parser should be generated with thinking_forced_open
+        {
+            common_chat_templates_inputs inputs;
+            inputs.messages = { message_user };
+            inputs.tools = { special_function_tool };
+            inputs.enable_thinking = true;
+            auto params = common_chat_templates_apply(tmpls.get(), inputs);
+            assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, params.format);
+            assert_equals(true, params.thinking_forced_open);
+            assert_equals(false, params.grammar.empty());
+            assert_equals(false, params.parser.empty());
+            auto grammar = build_grammar(params.grammar);
+            GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar with thinking_forced_open");
+        }
     }
 }
 
@@ -3805,6 +3822,196 @@ static void test_template_output_peg_parsers() {
         });
     }
 
+    {
+        // Step-3.5-Flash (uses Nemotron v3 PEG parser with thinking_forced_open)
+        // Unlike Nemotron, Step-3.5-Flash always emits <think> regardless of enable_thinking,
+        // so all inputs must include a </think> delimiter.
+        auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
+
+        // Test basic message with reasoning
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+
+            t.expect = message_assist_thoughts;
+        });
+
+        // Test basic message without thinking content
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input = "</think>\nHello, world!\nWhat's up?";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+
+            t.expect = message_assist;
+        });
+
+        // Test tool call without thinking content
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input =
+                "</think>\n"
+                "<tool_call>\n"
+                "<function=special_function>\n"
+                "<parameter=arg1>\n"
+                "1\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+            t.params.tools = {special_function_tool};
+
+            t.expect = message_assist_call;
+        });
+
+        // Test tool call with thinking
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input =
+                "I'm\nthinking\n</think>\n"
+                "<tool_call>\n"
+                "<function=special_function>\n"
+                "<parameter=arg1>\n"
+                "1\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+            t.params.tools = {special_function_tool};
+
+            t.expect = message_assist_call_thoughts;
+        });
+
+        // Test parallel tool calls with thinking
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input =
+                "I'm\nthinking\n</think>\n"
+                "<tool_call>\n"
+                "<function=special_function>\n"
+                "<parameter=arg1>\n"
+                "1\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>\n"
+                "<tool_call>\n"
+                "<function=special_function_with_opt>\n"
+                "<parameter=arg1>\n"
+                "1\n"
+                "</parameter>\n"
+                "<parameter=arg2>\n"
+                "2\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+            t.params.parallel_tool_calls = true;
+            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
+
+            t.expect.reasoning_content = "I'm\nthinking";
+            t.expect.tool_calls = {{
+                /* .name = */      "special_function",
+                /* .arguments = */ R"({"arg1": 1})",
+                /* .id = */        {},
+            }, {
+                /* .name = */      "special_function_with_opt",
+                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
+                /* .id = */        {},
+            }};
+        });
+
+        // Test parallel tool calls without thinking content
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input =
+                "</think>\n"
+                "<tool_call>\n"
+                "<function=special_function>\n"
+                "<parameter=arg1>\n"
+                "1\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>\n"
+                "<tool_call>\n"
+                "<function=special_function_with_opt>\n"
+                "<parameter=arg1>\n"
+                "1\n"
+                "</parameter>\n"
+                "<parameter=arg2>\n"
+                "2\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+            t.params.parallel_tool_calls = true;
+            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
+
+            t.expect.tool_calls = {{
+                /* .name = */      "special_function",
+                /* .arguments = */ R"({"arg1": 1})",
+                /* .id = */        {},
+            }, {
+                /* .name = */      "special_function_with_opt",
+                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
+                /* .id = */        {},
+            }};
+        });
+
+        // Test tool call with code string parameter
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input =
+                "</think>\n"
+                "<tool_call>\n"
+                "<function=python>\n"
+                "<parameter=code>\n"
+                "def hello():\n"
+                "    print(\"Hello, world!\")\n"
+                "\n"
+                "hello()\n"
+                "</parameter>\n"
+                "</function>\n"
+                "</tool_call>";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+            t.params.tools = {python_tool};
+
+            t.expect.tool_calls = {{
+                /* .name = */      "python",
+                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
+                /* .id = */        {},
+            }};
+        });
+
+        // Test tool call with string parameter and no closing </parameter> tag
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input =
+                "</think>\n"
+                "<tool_call>\n"
+                "<function=python>\n"
+                "<parameter=code>\n"
+                "def hello():\n"
+                "    print(\"Hello, world!\")\n"
+                "\n"
+                "hello()\n"
+                "</function>\n"
+                "</tool_call>";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+            t.params.tools = {python_tool};
+
+            t.expect.tool_calls = {{
+                /* .name = */      "python",
+                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
+                /* .id = */        {},
+            }};
+        });
+
+        // Test response format (JSON schema with thinking)
+        test_peg_parser(tmpls.get(), [&](auto & t) {
+            t.input =
+              "I need to output the invoice details in JSON\n"
+              "</think>\n"
+              R"({"amount": 123.45, "date": "2025-12-03"})";
+            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+            t.params.json_schema = invoice_schema;
+
+            t.expect.reasoning_content = "I need to output the invoice details in JSON";
+            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
+        });
+    }
+
     {
         // Solar-Open-100B
         auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");

From bdc1dda64f80393af9255128dcdf70ea08e3e046 Mon Sep 17 00:00:00 2001
From: Jesse Posner <jesse.posner@gmail.com>
Date: Sun, 15 Feb 2026 22:25:12 -0800
Subject: [PATCH 3/3] chat : remove dead thinking code from qwen3_coder_xml

Remove thinking handling code that became unreachable after routing
Step-3.5-Flash to the Nemotron v3 PEG parser. Qwen3-Coder has no
<think> in its template, so the thinking_forced_open logic, preserved
tokens, and grammar prefix were dead paths.
---
 common/chat-parser-xml-toolcall.cpp |  1 -
 common/chat.cpp                     | 11 -----------
 tests/test-chat.cpp                 |  3 +--
 3 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp
index 56d8bb410a..a80900ff8d 100644
--- a/common/chat-parser-xml-toolcall.cpp
+++ b/common/chat-parser-xml-toolcall.cpp
@@ -279,7 +279,6 @@ void build_grammar_xml_tool_call(common_chat_params & data, const json & tools,
             auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
             auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
             builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
                 (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
                 tool_call_multiple_with_end  + "?" +
                 (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
diff --git a/common/chat.cpp b/common/chat.cpp
index d87cb682c5..04fe8bc070 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1884,18 +1884,7 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c
     data.prompt = apply(tmpl, params);
     data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
 
-    // Handle thinking tags (e.g. Step-3.5-Flash unconditionally emits <think>)
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!params.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
     data.preserved_tokens = {
-        "<think>",
-        "</think>",
         "<tool_call>",
         "</tool_call>",
         "<function=",
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 1bf6e4d9ec..1bef5b9f44 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -3566,14 +3566,13 @@ Hey there!<|im_end|>
             common_chat_templates_inputs inputs;
             inputs.messages = { message_user };
             inputs.tools = { special_function_tool };
-            inputs.enable_thinking = true;
             auto params = common_chat_templates_apply(tmpls.get(), inputs);
             assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, params.format);
             assert_equals(true, params.thinking_forced_open);
             assert_equals(false, params.grammar.empty());
             assert_equals(false, params.parser.empty());
             auto grammar = build_grammar(params.grammar);
-            GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar with thinking_forced_open");
+            GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar");
         }
     }
 }