From cc18ecc5b7047a7442fb1cde92d7764cf5a082ec Mon Sep 17 00:00:00 2001 From: CNE FICHEPOIL Pierre Date: Tue, 4 Nov 2025 15:58:30 +0100 Subject: [PATCH] added tests --- tests/test-chat.cpp | 62 +++++++++++++++++++++++ tools/server/tests/unit/test_tool_call.py | 59 +++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 4a8ba849b3..619bc1f106 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1111,6 +1111,68 @@ static void test_template_output_parsers() { /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, })); } + { + auto tmpls = read_templates("models/templates/Qwen-Qwen3-0.6B.jinja"); + std::vector end_tokens{ "<|im_end|>" }; + + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + + // Test that enable_thinking=false adds empty think tags + { + common_chat_templates_inputs inputs_no_thinking; + inputs_no_thinking.messages = {message_user}; + inputs_no_thinking.tools = tools; + inputs_no_thinking.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED; + inputs_no_thinking.enable_thinking = false; + + auto params = common_chat_templates_apply(tmpls.get(), inputs_no_thinking); + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, params.format); + // Verify the prompt contains empty think tags when thinking is disabled + assert_equals(true, params.prompt.find("\n\n") != std::string::npos); + } + + // Test that grammar allows thinking with REQUIRED tool choice + { + common_chat_templates_inputs inputs_with_thinking; + inputs_with_thinking.messages = {message_user}; + inputs_with_thinking.tools = tools; + inputs_with_thinking.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED; + inputs_with_thinking.enable_thinking = true; + + auto params = common_chat_templates_apply(tmpls.get(), inputs_with_thinking); + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, params.format); + + // The key fix: grammar should contain the thinking pattern even with REQUIRED + assert_equals(false, params.grammar.empty()); + assert_equals(true, params.grammar.find("") != std::string::npos); + + // Grammar should allow thinking before tool calls + assert_equals(true, params.grammar.find("think-") != std::string::npos || + params.grammar.find("") != std::string::npos); + } + + // Test parsing: tool call with thinking works correctly + assert_msg_equals(message_assist_call_thoughts, + common_chat_parse( + "I'm\nthinking\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); + + // Test that reasoning + tool calls work in template generation + test_templates(tmpls.get(), end_tokens, message_assist_call_thoughts, tools, + "", // Don't check exact delta, just verify it parses correctly + /* expect_grammar_triggered= */ true, + /* test_grammar_if_triggered= */ true, + COMMON_REASONING_FORMAT_DEEPSEEK); + + // Verify enable_thinking support + assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get())); + } { auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja"); std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; diff --git a/tools/server/tests/unit/test_tool_call.py b/tools/server/tests/unit/test_tool_call.py index b8f0f10863..b191840cd2 100755 --- a/tools/server/tests/unit/test_tool_call.py +++ b/tools/server/tests/unit/test_tool_call.py @@ -623,3 +623,62 @@ def do_test_hello_world(server: ServerProcess, **kwargs): code = actual_arguments["code"] assert isinstance(code, str), f"Expected code to be a string, got {type(code)}: {json.dumps(code)}" assert re.match(r'''print\(("[Hh]ello,? [Ww]orld!?"|'[Hh]ello,? [Ww]orld!?')\)''', re.sub(r'#.*\n?', '', code)), f'Expected hello world, got {code}' + + + +@pytest.mark.slow +@pytest.mark.parametrize("stream", [CompletionMode.NORMAL, CompletionMode.STREAMED]) +@pytest.mark.parametrize("tool,hf_repo,template_override,reasoning_format", [ + (PYTHON_TOOL, "unsloth/Qwen3-0.6B-GGUF:Q4_K_M", None, 'deepseek'), + (TEST_TOOL, "unsloth/Qwen3-0.6B-GGUF:Q4_K_M", None, 'deepseek'), +]) +def test_required_tool_with_reasoning(tool: dict, hf_repo: str, template_override: str | Tuple[str, str | None] | None, reasoning_format: Literal['deepseek', 'none'], stream: CompletionMode): + global server + n_predict = 512 + + # Set the reasoning format + server.reasoning_format = reasoning_format + + server.jinja = True + server.n_ctx = 8192 + server.n_predict = n_predict + server.model_hf_repo = hf_repo + server.model_hf_file = None + + + server.start(timeout_seconds=TIMEOUT_START_SLOW) + + # Make the request with "tool_choice": "required" + body = server.make_any_request("POST", "/v1/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "Write an example"}, # This prompt will force the tool use + ], + "tool_choice": "required", + "tools": [tool], + "parallel_tool_calls": False, + "stream": stream == CompletionMode.STREAMED, + "temperature": 0.0, + "top_k": 1, + "top_p": 1.0, + }, timeout=TIMEOUT_HTTP_REQUEST) + + choice = body["choices"][0] + + + reasoning_content:str = choice["message"].get("reasoning_content") + assert reasoning_content is not None, 'Expected reasoning content, but got None' + assert len(reasoning_content.strip()) > 3, 'Reasoning content is too small to be credible' + + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"] + assert expected_function_name == tool_call["function"]["name"] + + actual_arguments = json.loads(tool_call["function"]["arguments"]) + if tool is PYTHON_TOOL: + assert "code" in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: 'code'" + elif tool is TEST_TOOL: + assert "success" in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: 'success'" \ No newline at end of file