From c6c4f7c41f2ebb8db91db3193244be7cecb1515e Mon Sep 17 00:00:00 2001
From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
Date: Mon, 11 Aug 2025 18:21:59 +0200
Subject: [PATCH 01/15] Update chat.cpp to support (at least) qwen3 +
 tool_choice = required

---
 common/chat.cpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index b1a1218ca2..7ac3686e12 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1586,8 +1586,9 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
     }
 
     if (!inputs.tools.is_null()) {
+        auto supports_thinking = tmpl.source().find("<think>") != std::string::npos && data.thinking_forced_open == false;
         // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED || supports_thinking;
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
             std::vector<std::string> tool_rules;
             std::vector<std::string> tool_call_alts;
@@ -1639,9 +1640,19 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
             tool_call_alts.push_back(
                 "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
             auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
-            builder.add_rule("root",
+            if (supports_thinking)
+            {
+                builder.add_rule("thinking", "\"<think>\" [^\\x00]* \"</think>\" space");
+                builder.add_rule("root", 
+                "(thinking)? space " + 
+                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+            }
+            else
+            {
+                builder.add_rule("root",
                 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
                 (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+            }
             // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
             data.grammar_triggers.push_back({
                 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,

From 42937a53226167e0f0d5f112d01685a9e1f44caf Mon Sep 17 00:00:00 2001
From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
Date: Mon, 11 Aug 2025 23:21:48 +0200
Subject: [PATCH 02/15] refactored changes to follow string tern op

---
 common/chat.cpp | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 7ac3686e12..932231facd 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1640,19 +1640,13 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
             tool_call_alts.push_back(
                 "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
             auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
-            if (supports_thinking)
-            {
+            if (supports_thinking) {
                 builder.add_rule("thinking", "\"<think>\" [^\\x00]* \"</think>\" space");
-                builder.add_rule("root", 
-                "(thinking)? space " + 
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
             }
-            else
-            {
-                builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
+            builder.add_rule("root",
+                std::string(supports_thinking ? "(thinking)? space " : 
+                            data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
                 (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-            }
             // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
             data.grammar_triggers.push_back({
                 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,

From 5796938c03bb35144f7f0cde1474f0295a3bb14b Mon Sep 17 00:00:00 2001
From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
Date: Tue, 12 Aug 2025 06:42:04 +0200
Subject: [PATCH 03/15] fixing editorconfig-checker CI (tailing whitespace)

---
 common/chat.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 932231facd..429a636685 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1644,7 +1644,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
                 builder.add_rule("thinking", "\"<think>\" [^\\x00]* \"</think>\" space");
             }
             builder.add_rule("root",
-                std::string(supports_thinking ? "(thinking)? space " : 
+                std::string(supports_thinking ? "(thinking)? space " :
                             data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
                 (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
             // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)

From 79e4a7bdbcf5212dc50bf48c40084394f2225194 Mon Sep 17 00:00:00 2001
From: Pierre F <no@p.e>
Date: Mon, 25 Aug 2025 18:53:46 +0200
Subject: [PATCH 04/15] hermes 2 pro tool calling, better support for thinking
 (thinking tag already opened grammar)

---
 common/chat.cpp | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index fa04736437..c230eecaa4 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1767,9 +1767,11 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
     }
 
     if (!inputs.tools.is_null()) {
-        auto supports_thinking = tmpl.source().find("<think>") != std::string::npos && data.thinking_forced_open == false;
+        auto supports_thinking = tmpl.source().find("<think>") != std::string::npos;
+        // you should not be able to call enable_thinking if <think> is not supported
+        GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking);
         // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED || supports_thinking;
+        data.grammar_lazy = true;
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
             std::vector<std::string> tool_rules;
             std::vector<std::string> tool_call_alts;
@@ -1821,13 +1823,27 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
             tool_call_alts.push_back(
                 "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
             auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
-            if (supports_thinking) {
-                builder.add_rule("thinking", "\"<think>\" [^\\x00]* \"</think>\" space");
+
+            builder.add_rule("thinking_start", "\"<think>\"");
+            builder.add_rule("thinking_content", "[^\\x00]*");
+            builder.add_rule("thinking_end", "\"</think>\" space");
+
+            //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
+            std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted
+            if (extra_context["enable_thinking"]) {
+                if (data.thinking_forced_open) {
+                    //thinking tag was already opened by used so we don't need to add it again
+                    thinking_grammar_logic = "thinking_content thinking_end ";
+                }
+                else
+                {
+                    thinking_grammar_logic = "thinking_start thinking_content thinking_end ";
+                }
             }
-            builder.add_rule("root",
-                std::string(supports_thinking ? "(thinking)? space " :
-                            data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+
+
+            builder.add_rule("root", thinking_grammar_logic + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+
             // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
             data.grammar_triggers.push_back({
                 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,

From dbae9214b9cd15e1097168eda1be3f1115dd3f40 Mon Sep 17 00:00:00 2001
From: Pierre F <no@p.e>
Date: Mon, 25 Aug 2025 19:45:19 +0200
Subject: [PATCH 05/15] qwen hermes tool calling : fixed grammar rules names

---
 common/chat.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index c230eecaa4..67db927a5c 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1824,20 +1824,20 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
                 "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
             auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
 
-            builder.add_rule("thinking_start", "\"<think>\"");
-            builder.add_rule("thinking_content", "[^\\x00]*");
-            builder.add_rule("thinking_end", "\"</think>\" space");
+            builder.add_rule("thinking-start", "\"<think>\"");
+            builder.add_rule("thinking-content", "[^\\x00]*");
+            builder.add_rule("thinking-end", "\"</think>\" space");
 
             //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
             std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted
             if (extra_context["enable_thinking"]) {
                 if (data.thinking_forced_open) {
                     //thinking tag was already opened by used so we don't need to add it again
-                    thinking_grammar_logic = "thinking_content thinking_end ";
+                    thinking_grammar_logic = "thinking-content thinking-end ";
                 }
                 else
                 {
-                    thinking_grammar_logic = "thinking_start thinking_content thinking_end ";
+                    thinking_grammar_logic = "thinking-start thinking-content thinking-end ";
                 }
             }
 

From 86493dd92b7d7ab2e2312df08b937ecbf0470ef7 Mon Sep 17 00:00:00 2001
From: Pierre F <no@p.e>
Date: Mon, 25 Aug 2025 20:13:45 +0200
Subject: [PATCH 06/15] fixed really weird grammar crash `Unexpected empty
 grammar stack after accepting piece:`

---
 common/chat.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 67db927a5c..52743a1d32 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1837,7 +1837,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
                 }
                 else
                 {
-                    thinking_grammar_logic = "thinking-start thinking-content thinking-end ";
+                    thinking_grammar_logic = "(thinking-start thinking-content thinking-end)? ";
                 }
             }
 

From bb5e352ad91cdb868df831513442607193d6e6a6 Mon Sep 17 00:00:00 2001
From: Pierre F <no@p.e>
Date: Mon, 25 Aug 2025 20:26:38 +0200
Subject: [PATCH 07/15] also apply the hotcrashfix here, just in case

---
 common/chat.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 52743a1d32..23a446da91 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1833,7 +1833,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
             if (extra_context["enable_thinking"]) {
                 if (data.thinking_forced_open) {
                     //thinking tag was already opened by used so we don't need to add it again
-                    thinking_grammar_logic = "thinking-content thinking-end ";
+                    thinking_grammar_logic = "(thinking-content thinking-end)? ";
                 }
                 else
                 {

From 6d5f561896d5d75647503ddbbd1e9d2bae3cab24 Mon Sep 17 00:00:00 2001
From: Pierre F <no@p.e>
Date: Tue, 26 Aug 2025 12:30:57 +0200
Subject: [PATCH 08/15] reverted changes done to grammar_lazy for hermes 2

---
 common/chat.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 23a446da91..5f75192814 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1771,7 +1771,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
         // you should not be able to call enable_thinking if <think> is not supported
         GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking);
         // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
-        data.grammar_lazy = true;
+        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
             std::vector<std::string> tool_rules;
             std::vector<std::string> tool_call_alts;

From 352274e116de585e5fd657a15ae06c31694655de Mon Sep 17 00:00:00 2001
From: Pierre F <no@p.e>
Date: Tue, 26 Aug 2025 12:39:50 +0200
Subject: [PATCH 09/15] if there is enable_thinking enabled but hermes model
 doesn't support it, just disable it, don't GGML_ABORT

---
 common/chat.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 5f75192814..6e087625a5 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1758,6 +1758,13 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
 
     data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
     data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
+    auto supports_thinking = tmpl.source().find("<think>") != std::string::npos;
+
+    // you should not be able to call enable_thinking if <think> is not supported
+    if (!supports_thinking && extra_context["enable_thinking"]) {
+        extra_context["enable_thinking"] = false;
+    }
+
     if (string_ends_with(data.prompt, "<think>\n")) {
         if (!extra_context["enable_thinking"]) {
             data.prompt += "</think>";
@@ -1767,9 +1774,6 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
     }
 
     if (!inputs.tools.is_null()) {
-        auto supports_thinking = tmpl.source().find("<think>") != std::string::npos;
-        // you should not be able to call enable_thinking if <think> is not supported
-        GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking);
         // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
         data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {

From 0e558302a2daf5ecd671c14e84a52be1b89490e7 Mon Sep 17 00:00:00 2001
From: CNE FICHEPOIL Pierre <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr>
Date: Tue, 26 Aug 2025 14:48:26 +0200
Subject: [PATCH 10/15] fix thinking-content eating closing think tag | ref
 #8953

---
 common/chat.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 6e087625a5..154b2a352d 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1829,7 +1829,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
             auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
 
             builder.add_rule("thinking-start", "\"<think>\"");
-            builder.add_rule("thinking-content", "[^\\x00]*");
+            builder.add_rule("thinking-content", "( [^<] | \"<\" [^/] | \"</\" [^t] | \"</t\" [^h] | \"</th\" [^i] | \"</thi\" [^n] | \"</thin\" [^k] | \"</think\" [^>] )*");
             builder.add_rule("thinking-end", "\"</think>\" space");
 
             //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed

From e62cd709e793a7acfbc5f9508960db6f0929a21f Mon Sep 17 00:00:00 2001
From: CNE FICHEPOIL Pierre <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr>
Date: Tue, 26 Aug 2025 15:01:36 +0200
Subject: [PATCH 11/15] removed `?` from grammar as it doesn't crash on linux,
 probably worth it's own issue

---
 common/chat.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 154b2a352d..ff4ca39ad7 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1837,11 +1837,11 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
             if (extra_context["enable_thinking"]) {
                 if (data.thinking_forced_open) {
                     //thinking tag was already opened by used so we don't need to add it again
-                    thinking_grammar_logic = "(thinking-content thinking-end)? ";
+                    thinking_grammar_logic = "(thinking-content thinking-end) ";
                 }
                 else
                 {
-                    thinking_grammar_logic = "(thinking-start thinking-content thinking-end)? ";
+                    thinking_grammar_logic = "(thinking-start thinking-content thinking-end) ";
                 }
             }
 

From 310701ba1bebc21384383558e0e410be5dbc7736 Mon Sep 17 00:00:00 2001
From: Pierre F <no@p.e>
Date: Thu, 28 Aug 2025 22:23:18 +0200
Subject: [PATCH 12/15] fixed crash with "auto" mode, trigger was missing

---
 common/chat.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/common/chat.cpp b/common/chat.cpp
index ff4ca39ad7..89fbf479b8 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1835,6 +1835,10 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
             //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
             std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted
             if (extra_context["enable_thinking"]) {
+                data.grammar_triggers.push_back({
+                    COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
+                    data.thinking_forced_open ? "</think>" : "<think>"
+                });
                 if (data.thinking_forced_open) {
                     //thinking tag was already opened by used so we don't need to add it again
                     thinking_grammar_logic = "(thinking-content thinking-end) ";

From 73010a1fc6ab401e435598db05025a090a97c2ba Mon Sep 17 00:00:00 2001
From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
Date: Tue, 4 Nov 2025 14:15:41 +0100
Subject: [PATCH 13/15] Applied @ochafik 's suggested code after testing
 locally, no regression

Co-authored-by: Olivier Chafik <ochafik@anthropic.com>
---
 common/chat.cpp | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 0887593744..93eadfdb19 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -2339,30 +2339,26 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
                 "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
             auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
 
-            builder.add_rule("thinking-start", "\"<think>\"");
-            builder.add_rule("thinking-content", "( [^<] | \"<\" [^/] | \"</\" [^t] | \"</t\" [^h] | \"</th\" [^i] | \"</thi\" [^n] | \"</thin\" [^k] | \"</think\" [^>] )*");
-            builder.add_rule("thinking-end", "\"</think>\" space");
-
-            //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
-            std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted
+            // thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
             if (extra_context["enable_thinking"]) {
                 data.grammar_triggers.push_back({
                     COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
                     data.thinking_forced_open ? "</think>" : "<think>"
                 });
-                if (data.thinking_forced_open) {
-                    //thinking tag was already opened by used so we don't need to add it again
-                    thinking_grammar_logic = "(thinking-content thinking-end) ";
-                }
-                else
-                {
-                    thinking_grammar_logic = "(thinking-start thinking-content thinking-end) ";
+                std::string prelude = "";
+                if (!data.thinking_forced_open) {
+                    prelude = builder.add_rule("think-start", "\"<think>\"");
                 }
+                prelude += " ";
+                prelude += builder.add_rule("think-content", "( [^<] | \"<\" [^/] | \"</\" [^t] | \"</t\" [^h] | \"</th\" [^i] | \"</thi\" [^n] | \"</thin\" [^k] | \"</think\" [^>] )*");
+                prelude += " ";
+                prelude += builder.add_rule("think-end", "\"</think>\" space");
+                prelude += " ";
+                builder.add_rule("root", prelude + "(" + tool_call + ")" + (inputs.parallel_tool_calls ? "*" : "?"));
+            } else {
+                builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
             }
 
-
-            builder.add_rule("root", thinking_grammar_logic + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-
             // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
             data.grammar_triggers.push_back({
                 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,

From 6441ad48c667e7b948d6b30fa2484e7a4823c528 Mon Sep 17 00:00:00 2001
From: CNE FICHEPOIL Pierre <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr>
Date: Tue, 4 Nov 2025 15:23:58 +0100
Subject: [PATCH 14/15] updated qwen3 0.6B chat template with official one

---
 models/templates/Qwen-Qwen3-0.6B.jinja | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/models/templates/Qwen-Qwen3-0.6B.jinja b/models/templates/Qwen-Qwen3-0.6B.jinja
index 699ff8df40..01be9b307d 100644
--- a/models/templates/Qwen-Qwen3-0.6B.jinja
+++ b/models/templates/Qwen-Qwen3-0.6B.jinja
@@ -17,23 +17,27 @@
 {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
 {%- for message in messages[::-1] %}
     {%- set index = (messages|length - 1) - loop.index0 %}
-    {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
         {%- set ns.multi_step_tool = false %}
         {%- set ns.last_query_index = index %}
     {%- endif %}
 {%- endfor %}
 {%- for message in messages %}
-    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
-        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
-    {%- elif message.role == "assistant" %}
+    {%- if message.content is string %}
         {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
         {%- set reasoning_content = '' %}
-        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
+        {%- if message.reasoning_content is string %}
             {%- set reasoning_content = message.reasoning_content %}
         {%- else %}
-            {%- if '</think>' in message.content %}
-                {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
-                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
             {%- endif %}
         {%- endif %}
         {%- if loop.index0 > ns.last_query_index %}
@@ -70,7 +74,7 @@
             {{- '<|im_start|>user' }}
         {%- endif %}
         {{- '\n<tool_response>\n' }}
-        {{- message.content }}
+        {{- content }}
         {{- '\n</tool_response>' }}
         {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
             {{- '<|im_end|>\n' }}

From cc18ecc5b7047a7442fb1cde92d7764cf5a082ec Mon Sep 17 00:00:00 2001
From: CNE FICHEPOIL Pierre <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr>
Date: Tue, 4 Nov 2025 15:58:30 +0100
Subject: [PATCH 15/15] added tests

---
 tests/test-chat.cpp                       | 62 +++++++++++++++++++++++
 tools/server/tests/unit/test_tool_call.py | 59 +++++++++++++++++++++
 2 files changed, 121 insertions(+)

diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 4a8ba849b3..619bc1f106 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1111,6 +1111,68 @@ static void test_template_output_parsers() {
                     /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
                 }));
     }
+    {
+        auto tmpls = read_templates("models/templates/Qwen-Qwen3-0.6B.jinja");
+        std::vector<std::string> end_tokens{ "<|im_end|>" };
+
+        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
+        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+
+        // Test that enable_thinking=false adds empty think tags
+        {
+            common_chat_templates_inputs inputs_no_thinking;
+            inputs_no_thinking.messages = {message_user};
+            inputs_no_thinking.tools = tools;
+            inputs_no_thinking.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+            inputs_no_thinking.enable_thinking = false;
+            
+            auto params = common_chat_templates_apply(tmpls.get(), inputs_no_thinking);
+            assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, params.format);
+            // Verify the prompt contains empty think tags when thinking is disabled
+            assert_equals(true, params.prompt.find("<think>\n\n</think>") != std::string::npos);
+        }
+
+        // Test that grammar allows thinking with REQUIRED tool choice
+        {
+            common_chat_templates_inputs inputs_with_thinking;
+            inputs_with_thinking.messages = {message_user};
+            inputs_with_thinking.tools = tools;
+            inputs_with_thinking.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+            inputs_with_thinking.enable_thinking = true;
+            
+            auto params = common_chat_templates_apply(tmpls.get(), inputs_with_thinking);
+            assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, params.format);
+            
+            // The key fix: grammar should contain the thinking pattern even with REQUIRED
+            assert_equals(false, params.grammar.empty());
+            assert_equals(true, params.grammar.find("</think>") != std::string::npos);
+            
+            // Grammar should allow thinking before tool calls
+            assert_equals(true, params.grammar.find("think-") != std::string::npos || 
+                            params.grammar.find("<think>") != std::string::npos);
+        }
+
+        // Test parsing: tool call with thinking works correctly
+        assert_msg_equals(message_assist_call_thoughts,
+            common_chat_parse(
+                "<think>I'm\nthinking</think>\n"
+                "<tool_call>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>",
+                /* is_partial= */ false,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                }));
+
+        // Test that reasoning + tool calls work in template generation
+        test_templates(tmpls.get(), end_tokens, message_assist_call_thoughts, tools,
+                    "",  // Don't check exact delta, just verify it parses correctly
+                    /* expect_grammar_triggered= */ true,
+                    /* test_grammar_if_triggered= */ true,
+                    COMMON_REASONING_FORMAT_DEEPSEEK);
+
+        // Verify enable_thinking support
+        assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
+    }
     {
         auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
         std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
diff --git a/tools/server/tests/unit/test_tool_call.py b/tools/server/tests/unit/test_tool_call.py
index b8f0f10863..b191840cd2 100755
--- a/tools/server/tests/unit/test_tool_call.py
+++ b/tools/server/tests/unit/test_tool_call.py
@@ -623,3 +623,62 @@ def do_test_hello_world(server: ServerProcess, **kwargs):
     code = actual_arguments["code"]
     assert isinstance(code, str), f"Expected code to be a string, got {type(code)}: {json.dumps(code)}"
     assert re.match(r'''print\(("[Hh]ello,? [Ww]orld!?"|'[Hh]ello,? [Ww]orld!?')\)''', re.sub(r'#.*\n?', '', code)), f'Expected hello world, got {code}'
+
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("stream", [CompletionMode.NORMAL, CompletionMode.STREAMED])
+@pytest.mark.parametrize("tool,hf_repo,template_override,reasoning_format", [
+    (PYTHON_TOOL, "unsloth/Qwen3-0.6B-GGUF:Q4_K_M", None, 'deepseek'),
+    (TEST_TOOL,   "unsloth/Qwen3-0.6B-GGUF:Q4_K_M", None, 'deepseek'),
+])
+def test_required_tool_with_reasoning(tool: dict, hf_repo: str, template_override: str | Tuple[str, str | None] | None, reasoning_format: Literal['deepseek', 'none'], stream: CompletionMode):
+    global server
+    n_predict = 512
+    
+    # Set the reasoning format
+    server.reasoning_format = reasoning_format
+    
+    server.jinja = True
+    server.n_ctx = 8192
+    server.n_predict = n_predict
+    server.model_hf_repo = hf_repo
+    server.model_hf_file = None
+    
+        
+    server.start(timeout_seconds=TIMEOUT_START_SLOW)
+
+    # Make the request with "tool_choice": "required"
+    body = server.make_any_request("POST", "/v1/chat/completions", data={
+        "max_tokens": n_predict,
+        "messages": [
+            {"role": "system", "content": "You are a coding assistant."},
+            {"role": "user", "content": "Write an example"}, # This prompt will force the tool use
+        ],
+        "tool_choice": "required",
+        "tools": [tool],
+        "parallel_tool_calls": False,
+        "stream": stream == CompletionMode.STREAMED,
+        "temperature": 0.0,
+        "top_k": 1,
+        "top_p": 1.0,
+    }, timeout=TIMEOUT_HTTP_REQUEST)
+    
+    choice = body["choices"][0]
+
+
+    reasoning_content:str = choice["message"].get("reasoning_content")
+    assert reasoning_content is not None, 'Expected reasoning content, but got None'
+    assert len(reasoning_content.strip()) > 3, 'Reasoning content is too small to be credible'
+
+    tool_calls = choice["message"].get("tool_calls")
+    assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
+    tool_call = tool_calls[0]
+    expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
+    assert expected_function_name == tool_call["function"]["name"]
+    
+    actual_arguments = json.loads(tool_call["function"]["arguments"])
+    if tool is PYTHON_TOOL:
+         assert "code" in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: 'code'"
+    elif tool is TEST_TOOL:
+         assert "success" in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: 'success'"
\ No newline at end of file