diff --git a/tools/server/tests/unit/test_template.py b/tools/server/tests/unit/test_template.py index e5185fcbfa..43a356020d 100644 --- a/tools/server/tests/unit/test_template.py +++ b/tools/server/tests/unit/test_template.py @@ -11,6 +11,7 @@ sys.path.insert(0, str(path)) import datetime from utils import * +from typing import Literal server: ServerProcess @@ -23,24 +24,24 @@ def create_server(): @pytest.mark.parametrize("tools", [None, [], [TEST_TOOL]]) -@pytest.mark.parametrize("template_name,reasoning_budget,expected_end", [ - ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", None, "\n"), - ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", -1, "\n"), - ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", 0, "\n"), +@pytest.mark.parametrize("template_name,reasoning,expected_end", [ + ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", "on", "\n"), + ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B","auto", "\n"), + ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", "off", "\n"), - ("Qwen-Qwen3-0.6B", -1, "<|im_start|>assistant\n"), - ("Qwen-Qwen3-0.6B", 0, "<|im_start|>assistant\n\n\n\n\n"), + ("Qwen-Qwen3-0.6B","auto", "<|im_start|>assistant\n"), + ("Qwen-Qwen3-0.6B", "off", "<|im_start|>assistant\n\n\n\n\n"), - ("Qwen-QwQ-32B", -1, "<|im_start|>assistant\n\n"), - ("Qwen-QwQ-32B", 0, "<|im_start|>assistant\n\n"), + ("Qwen-QwQ-32B","auto", "<|im_start|>assistant\n\n"), + ("Qwen-QwQ-32B", "off", "<|im_start|>assistant\n\n"), - ("CohereForAI-c4ai-command-r7b-12-2024-tool_use", -1, "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"), - ("CohereForAI-c4ai-command-r7b-12-2024-tool_use", 0, "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|>"), + ("CohereForAI-c4ai-command-r7b-12-2024-tool_use","auto", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"), + ("CohereForAI-c4ai-command-r7b-12-2024-tool_use", "off", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|>"), ]) -def test_reasoning_budget(template_name: str, reasoning_budget: int | None, expected_end: str, tools: list[dict]): +def test_reasoning(template_name: str, reasoning: Literal['on', 'off', 'auto'] | None, expected_end: str, tools: list[dict]): global server server.jinja = True - server.reasoning_budget = reasoning_budget + server.reasoning = reasoning server.chat_template_file = f'../../../models/templates/{template_name}.jinja' server.start() diff --git a/tools/server/tests/utils.py b/tools/server/tests/utils.py index db357d876b..c6fe11261f 100644 --- a/tools/server/tests/utils.py +++ b/tools/server/tests/utils.py @@ -95,7 +95,7 @@ class ServerProcess: no_webui: bool | None = None jinja: bool | None = None reasoning_format: Literal['deepseek', 'none', 'nothink'] | None = None - reasoning_budget: int | None = None + reasoning: Literal['on', 'off', 'auto'] | None = None chat_template: str | None = None chat_template_file: str | None = None server_path: str | None = None @@ -225,8 +225,8 @@ class ServerProcess: server_args.append("--no-jinja") if self.reasoning_format is not None: server_args.extend(("--reasoning-format", self.reasoning_format)) - if self.reasoning_budget is not None: - server_args.extend(("--reasoning-budget", self.reasoning_budget)) + if self.reasoning is not None: + server_args.extend(("--reasoning", self.reasoning)) if self.chat_template: server_args.extend(["--chat-template", self.chat_template]) if self.chat_template_file: