From c6c4f7c41f2ebb8db91db3193244be7cecb1515e Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Mon, 11 Aug 2025 18:21:59 +0200 Subject: [PATCH 01/15] Update chat.cpp to support (at least) qwen3 + tool_choice = required --- common/chat.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index b1a1218ca2..7ac3686e12 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1586,8 +1586,9 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat } if (!inputs.tools.is_null()) { + auto supports_thinking = tmpl.source().find("") != std::string::npos && data.thinking_forced_open == false; // (content)?({"name": "foo", "arguments": {"a": 1}})* - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED || supports_thinking; data.grammar = build_grammar([&](const common_grammar_builder & builder) { std::vector tool_rules; std::vector tool_call_alts; @@ -1639,9 +1640,19 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat tool_call_alts.push_back( "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space "); auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); - builder.add_rule("root", + if (supports_thinking) + { + builder.add_rule("thinking", "\"\" [^\\x00]* \"\" space"); + builder.add_rule("root", + "(thinking)? space " + + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); + } + else + { + builder.add_rule("root", std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); + } // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, From 42937a53226167e0f0d5f112d01685a9e1f44caf Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Mon, 11 Aug 2025 23:21:48 +0200 Subject: [PATCH 02/15] refactored changes to follow string tern op --- common/chat.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 7ac3686e12..932231facd 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1640,19 +1640,13 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat tool_call_alts.push_back( "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space "); auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); - if (supports_thinking) - { + if (supports_thinking) { builder.add_rule("thinking", "\"\" [^\\x00]* \"\" space"); - builder.add_rule("root", - "(thinking)? space " + - (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); } - else - { - builder.add_rule("root", - std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + + builder.add_rule("root", + std::string(supports_thinking ? "(thinking)? space " : + data.thinking_forced_open ? "( \"\" space )? " : "") + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); - } // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, From 5796938c03bb35144f7f0cde1474f0295a3bb14b Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Tue, 12 Aug 2025 06:42:04 +0200 Subject: [PATCH 03/15] fixing editorconfig-checker CI (tailing whitespace) --- common/chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat.cpp b/common/chat.cpp index 932231facd..429a636685 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1644,7 +1644,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat builder.add_rule("thinking", "\"\" [^\\x00]* \"\" space"); } builder.add_rule("root", - std::string(supports_thinking ? "(thinking)? space " : + std::string(supports_thinking ? "(thinking)? space " : data.thinking_forced_open ? "( \"\" space )? " : "") + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) From 79e4a7bdbcf5212dc50bf48c40084394f2225194 Mon Sep 17 00:00:00 2001 From: Pierre F Date: Mon, 25 Aug 2025 18:53:46 +0200 Subject: [PATCH 04/15] hermes 2 pro tool calling, better support for thinking (thinking tag already opened grammar) --- common/chat.cpp | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index fa04736437..c230eecaa4 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1767,9 +1767,11 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat } if (!inputs.tools.is_null()) { - auto supports_thinking = tmpl.source().find("") != std::string::npos && data.thinking_forced_open == false; + auto supports_thinking = tmpl.source().find("") != std::string::npos; + // you should not be able to call enable_thinking if is not supported + GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking); // (content)?({"name": "foo", "arguments": {"a": 1}})* - data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED || supports_thinking; + data.grammar_lazy = true; data.grammar = build_grammar([&](const common_grammar_builder & builder) { std::vector tool_rules; std::vector tool_call_alts; @@ -1821,13 +1823,27 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat tool_call_alts.push_back( "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space "); auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); - if (supports_thinking) { - builder.add_rule("thinking", "\"\" [^\\x00]* \"\" space"); + + builder.add_rule("thinking_start", "\"\""); + builder.add_rule("thinking_content", "[^\\x00]*"); + builder.add_rule("thinking_end", "\"\" space"); + + //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed + std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted + if (extra_context["enable_thinking"]) { + if (data.thinking_forced_open) { + //thinking tag was already opened by used so we don't need to add it again + thinking_grammar_logic = "thinking_content thinking_end "; + } + else + { + thinking_grammar_logic = "thinking_start thinking_content thinking_end "; + } } - builder.add_rule("root", - std::string(supports_thinking ? "(thinking)? space " : - data.thinking_forced_open ? "( \"\" space )? " : "") + - (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); + + + builder.add_rule("root", thinking_grammar_logic + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); + // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, From dbae9214b9cd15e1097168eda1be3f1115dd3f40 Mon Sep 17 00:00:00 2001 From: Pierre F Date: Mon, 25 Aug 2025 19:45:19 +0200 Subject: [PATCH 05/15] qwen hermes tool calling : fixed grammar rules names --- common/chat.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index c230eecaa4..67db927a5c 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1824,20 +1824,20 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space "); auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); - builder.add_rule("thinking_start", "\"\""); - builder.add_rule("thinking_content", "[^\\x00]*"); - builder.add_rule("thinking_end", "\"\" space"); + builder.add_rule("thinking-start", "\"\""); + builder.add_rule("thinking-content", "[^\\x00]*"); + builder.add_rule("thinking-end", "\"\" space"); //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted if (extra_context["enable_thinking"]) { if (data.thinking_forced_open) { //thinking tag was already opened by used so we don't need to add it again - thinking_grammar_logic = "thinking_content thinking_end "; + thinking_grammar_logic = "thinking-content thinking-end "; } else { - thinking_grammar_logic = "thinking_start thinking_content thinking_end "; + thinking_grammar_logic = "thinking-start thinking-content thinking-end "; } } From 86493dd92b7d7ab2e2312df08b937ecbf0470ef7 Mon Sep 17 00:00:00 2001 From: Pierre F Date: Mon, 25 Aug 2025 20:13:45 +0200 Subject: [PATCH 06/15] fixed really weird grammar crash `Unexpected empty grammar stack after accepting piece:` --- common/chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat.cpp b/common/chat.cpp index 67db927a5c..52743a1d32 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1837,7 +1837,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat } else { - thinking_grammar_logic = "thinking-start thinking-content thinking-end "; + thinking_grammar_logic = "(thinking-start thinking-content thinking-end)? "; } } From bb5e352ad91cdb868df831513442607193d6e6a6 Mon Sep 17 00:00:00 2001 From: Pierre F Date: Mon, 25 Aug 2025 20:26:38 +0200 Subject: [PATCH 07/15] also apply the hotcrashfix here, just in case --- common/chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat.cpp b/common/chat.cpp index 52743a1d32..23a446da91 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1833,7 +1833,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat if (extra_context["enable_thinking"]) { if (data.thinking_forced_open) { //thinking tag was already opened by used so we don't need to add it again - thinking_grammar_logic = "thinking-content thinking-end "; + thinking_grammar_logic = "(thinking-content thinking-end)? "; } else { From 6d5f561896d5d75647503ddbbd1e9d2bae3cab24 Mon Sep 17 00:00:00 2001 From: Pierre F Date: Tue, 26 Aug 2025 12:30:57 +0200 Subject: [PATCH 08/15] reverted changes done to grammar_lazy for hermes 2 --- common/chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat.cpp b/common/chat.cpp index 23a446da91..5f75192814 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1771,7 +1771,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat // you should not be able to call enable_thinking if is not supported GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking); // (content)?({"name": "foo", "arguments": {"a": 1}})* - data.grammar_lazy = true; + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; data.grammar = build_grammar([&](const common_grammar_builder & builder) { std::vector tool_rules; std::vector tool_call_alts; From 352274e116de585e5fd657a15ae06c31694655de Mon Sep 17 00:00:00 2001 From: Pierre F Date: Tue, 26 Aug 2025 12:39:50 +0200 Subject: [PATCH 09/15] if there is enable_thinking enabled but hermes model doesn't support it, just disable it, don't GGML_ABORT --- common/chat.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 5f75192814..6e087625a5 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1758,6 +1758,13 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context); data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO; + auto supports_thinking = tmpl.source().find("") != std::string::npos; + + // you should not be able to call enable_thinking if is not supported + if (!supports_thinking && extra_context["enable_thinking"]) { + extra_context["enable_thinking"] = false; + } + if (string_ends_with(data.prompt, "\n")) { if (!extra_context["enable_thinking"]) { data.prompt += ""; @@ -1767,9 +1774,6 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat } if (!inputs.tools.is_null()) { - auto supports_thinking = tmpl.source().find("") != std::string::npos; - // you should not be able to call enable_thinking if is not supported - GGML_ASSERT(!extra_context["enable_thinking"] || extra_context["enable_thinking"] == supports_thinking); // (content)?({"name": "foo", "arguments": {"a": 1}})* data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; data.grammar = build_grammar([&](const common_grammar_builder & builder) { From 0e558302a2daf5ecd671c14e84a52be1b89490e7 Mon Sep 17 00:00:00 2001 From: CNE FICHEPOIL Pierre Date: Tue, 26 Aug 2025 14:48:26 +0200 Subject: [PATCH 10/15] fix thinking-content eating closing think tag | ref #8953 --- common/chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/chat.cpp b/common/chat.cpp index 6e087625a5..154b2a352d 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1829,7 +1829,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); builder.add_rule("thinking-start", "\"\""); - builder.add_rule("thinking-content", "[^\\x00]*"); + builder.add_rule("thinking-content", "( [^<] | \"<\" [^/] | \"] )*"); builder.add_rule("thinking-end", "\"\" space"); //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed From e62cd709e793a7acfbc5f9508960db6f0929a21f Mon Sep 17 00:00:00 2001 From: CNE FICHEPOIL Pierre Date: Tue, 26 Aug 2025 15:01:36 +0200 Subject: [PATCH 11/15] removed `?` from grammar as it doesn't crash on linux, probably worth it's own issue --- common/chat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 154b2a352d..ff4ca39ad7 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1837,11 +1837,11 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat if (extra_context["enable_thinking"]) { if (data.thinking_forced_open) { //thinking tag was already opened by used so we don't need to add it again - thinking_grammar_logic = "(thinking-content thinking-end)? "; + thinking_grammar_logic = "(thinking-content thinking-end) "; } else { - thinking_grammar_logic = "(thinking-start thinking-content thinking-end)? "; + thinking_grammar_logic = "(thinking-start thinking-content thinking-end) "; } } From 310701ba1bebc21384383558e0e410be5dbc7736 Mon Sep 17 00:00:00 2001 From: Pierre F Date: Thu, 28 Aug 2025 22:23:18 +0200 Subject: [PATCH 12/15] fixed crash with "auto" mode, trigger was missing --- common/chat.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/common/chat.cpp b/common/chat.cpp index ff4ca39ad7..89fbf479b8 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1835,6 +1835,10 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted if (extra_context["enable_thinking"]) { + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_WORD, + data.thinking_forced_open ? "" : "" + }); if (data.thinking_forced_open) { //thinking tag was already opened by used so we don't need to add it again thinking_grammar_logic = "(thinking-content thinking-end) "; From 73010a1fc6ab401e435598db05025a090a97c2ba Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Tue, 4 Nov 2025 14:15:41 +0100 Subject: [PATCH 13/15] Applied @ochafik 's suggested code after testing locally, no regression Co-authored-by: Olivier Chafik --- common/chat.cpp | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 0887593744..93eadfdb19 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2339,30 +2339,26 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space "); auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | ")); - builder.add_rule("thinking-start", "\"\""); - builder.add_rule("thinking-content", "( [^<] | \"<\" [^/] | \"] )*"); - builder.add_rule("thinking-end", "\"\" space"); - - //thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed - std::string thinking_grammar_logic = ""; // thinking tag was closed or not supported/wanted + // thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed if (extra_context["enable_thinking"]) { data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, data.thinking_forced_open ? "" : "" }); - if (data.thinking_forced_open) { - //thinking tag was already opened by used so we don't need to add it again - thinking_grammar_logic = "(thinking-content thinking-end) "; - } - else - { - thinking_grammar_logic = "(thinking-start thinking-content thinking-end) "; + std::string prelude = ""; + if (!data.thinking_forced_open) { + prelude = builder.add_rule("think-start", "\"\""); } + prelude += " "; + prelude += builder.add_rule("think-content", "( [^<] | \"<\" [^/] | \"] )*"); + prelude += " "; + prelude += builder.add_rule("think-end", "\"\" space"); + prelude += " "; + builder.add_rule("root", prelude + "(" + tool_call + ")" + (inputs.parallel_tool_calls ? "*" : "?")); + } else { + builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); } - - builder.add_rule("root", thinking_grammar_logic + (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); - // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, From 6441ad48c667e7b948d6b30fa2484e7a4823c528 Mon Sep 17 00:00:00 2001 From: CNE FICHEPOIL Pierre Date: Tue, 4 Nov 2025 15:23:58 +0100 Subject: [PATCH 14/15] updated qwen3 0.6B chat template with official one --- models/templates/Qwen-Qwen3-0.6B.jinja | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/models/templates/Qwen-Qwen3-0.6B.jinja b/models/templates/Qwen-Qwen3-0.6B.jinja index 699ff8df40..01be9b307d 100644 --- a/models/templates/Qwen-Qwen3-0.6B.jinja +++ b/models/templates/Qwen-Qwen3-0.6B.jinja @@ -17,23 +17,27 @@ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} - {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('') and message.content.endswith('')) %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} - {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} - {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} - {%- elif message.role == "assistant" %} + {%- if message.content is string %} {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} {%- set reasoning_content = '' %} - {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} - {%- if '' in message.content %} - {%- set content = message.content.split('')[-1].lstrip('\n') %} - {%- set reasoning_content = message.content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} @@ -70,7 +74,7 @@ {{- '<|im_start|>user' }} {%- endif %} {{- '\n\n' }} - {{- message.content }} + {{- content }} {{- '\n' }} {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} {{- '<|im_end|>\n' }} From cc18ecc5b7047a7442fb1cde92d7764cf5a082ec Mon Sep 17 00:00:00 2001 From: CNE FICHEPOIL Pierre Date: Tue, 4 Nov 2025 15:58:30 +0100 Subject: [PATCH 15/15] added tests --- tests/test-chat.cpp | 62 +++++++++++++++++++++++ tools/server/tests/unit/test_tool_call.py | 59 +++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4a8ba849b3..619bc1f106 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1111,6 +1111,68 @@ static void test_template_output_parsers() { /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, })); } + { + auto tmpls = read_templates("models/templates/Qwen-Qwen3-0.6B.jinja"); + std::vector end_tokens{ "<|im_end|>" }; + + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + + // Test that enable_thinking=false adds empty think tags + { + common_chat_templates_inputs inputs_no_thinking; + inputs_no_thinking.messages = {message_user}; + inputs_no_thinking.tools = tools; + inputs_no_thinking.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED; + inputs_no_thinking.enable_thinking = false; + + auto params = common_chat_templates_apply(tmpls.get(), inputs_no_thinking); + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, params.format); + // Verify the prompt contains empty think tags when thinking is disabled + assert_equals(true, params.prompt.find("\n\n") != std::string::npos); + } + + // Test that grammar allows thinking with REQUIRED tool choice + { + common_chat_templates_inputs inputs_with_thinking; + inputs_with_thinking.messages = {message_user}; + inputs_with_thinking.tools = tools; + inputs_with_thinking.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED; + inputs_with_thinking.enable_thinking = true; + + auto params = common_chat_templates_apply(tmpls.get(), inputs_with_thinking); + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, params.format); + + // The key fix: grammar should contain the thinking pattern even with REQUIRED + assert_equals(false, params.grammar.empty()); + assert_equals(true, params.grammar.find("") != std::string::npos); + + // Grammar should allow thinking before tool calls + assert_equals(true, params.grammar.find("think-") != std::string::npos || + params.grammar.find("") != std::string::npos); + } + + // Test parsing: tool call with thinking works correctly + assert_msg_equals(message_assist_call_thoughts, + common_chat_parse( + "I'm\nthinking\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); + + // Test that reasoning + tool calls work in template generation + test_templates(tmpls.get(), end_tokens, message_assist_call_thoughts, tools, + "", // Don't check exact delta, just verify it parses correctly + /* expect_grammar_triggered= */ true, + /* test_grammar_if_triggered= */ true, + COMMON_REASONING_FORMAT_DEEPSEEK); + + // Verify enable_thinking support + assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get())); + } { auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja"); std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; diff --git a/tools/server/tests/unit/test_tool_call.py b/tools/server/tests/unit/test_tool_call.py index b8f0f10863..b191840cd2 100755 --- a/tools/server/tests/unit/test_tool_call.py +++ b/tools/server/tests/unit/test_tool_call.py @@ -623,3 +623,62 @@ def do_test_hello_world(server: ServerProcess, **kwargs): code = actual_arguments["code"] assert isinstance(code, str), f"Expected code to be a string, got {type(code)}: {json.dumps(code)}" assert re.match(r'''print\(("[Hh]ello,? [Ww]orld!?"|'[Hh]ello,? [Ww]orld!?')\)''', re.sub(r'#.*\n?', '', code)), f'Expected hello world, got {code}' + + + +@pytest.mark.slow +@pytest.mark.parametrize("stream", [CompletionMode.NORMAL, CompletionMode.STREAMED]) +@pytest.mark.parametrize("tool,hf_repo,template_override,reasoning_format", [ + (PYTHON_TOOL, "unsloth/Qwen3-0.6B-GGUF:Q4_K_M", None, 'deepseek'), + (TEST_TOOL, "unsloth/Qwen3-0.6B-GGUF:Q4_K_M", None, 'deepseek'), +]) +def test_required_tool_with_reasoning(tool: dict, hf_repo: str, template_override: str | Tuple[str, str | None] | None, reasoning_format: Literal['deepseek', 'none'], stream: CompletionMode): + global server + n_predict = 512 + + # Set the reasoning format + server.reasoning_format = reasoning_format + + server.jinja = True + server.n_ctx = 8192 + server.n_predict = n_predict + server.model_hf_repo = hf_repo + server.model_hf_file = None + + + server.start(timeout_seconds=TIMEOUT_START_SLOW) + + # Make the request with "tool_choice": "required" + body = server.make_any_request("POST", "/v1/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "Write an example"}, # This prompt will force the tool use + ], + "tool_choice": "required", + "tools": [tool], + "parallel_tool_calls": False, + "stream": stream == CompletionMode.STREAMED, + "temperature": 0.0, + "top_k": 1, + "top_p": 1.0, + }, timeout=TIMEOUT_HTTP_REQUEST) + + choice = body["choices"][0] + + + reasoning_content:str = choice["message"].get("reasoning_content") + assert reasoning_content is not None, 'Expected reasoning content, but got None' + assert len(reasoning_content.strip()) > 3, 'Reasoning content is too small to be credible' + + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"] + assert expected_function_name == tool_call["function"]["name"] + + actual_arguments = json.loads(tool_call["function"]["arguments"]) + if tool is PYTHON_TOOL: + assert "code" in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: 'code'" + elif tool is TEST_TOOL: + assert "success" in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: 'success'" \ No newline at end of file