diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index ff3c6d3c2b..13ea8c690f 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1463,6 +1463,7 @@ json convert_anthropic_to_oai(const json & body) { json tool_calls = json::array(); json converted_content = json::array(); json tool_results = json::array(); + std::string reasoning_content; bool has_tool_calls = false; for (const auto & block : content) { @@ -1470,6 +1471,8 @@ json convert_anthropic_to_oai(const json & body) { if (type == "text") { converted_content.push_back(block); + } else if (type == "thinking") { + reasoning_content += json_value(block, "thinking", std::string()); } else if (type == "image") { json source = json_value(block, "source", json::object()); std::string source_type = json_value(source, "type", std::string()); @@ -1528,16 +1531,19 @@ json convert_anthropic_to_oai(const json & body) { } } - if (!converted_content.empty() || has_tool_calls) { + if (!converted_content.empty() || has_tool_calls || !reasoning_content.empty()) { json new_msg = {{"role", role}}; if (!converted_content.empty()) { new_msg["content"] = converted_content; - } else if (has_tool_calls) { + } else if (has_tool_calls || !reasoning_content.empty()) { new_msg["content"] = ""; } if (!tool_calls.empty()) { new_msg["tool_calls"] = tool_calls; } + if (!reasoning_content.empty()) { + new_msg["reasoning_content"] = reasoning_content; + } oai_messages.push_back(new_msg); } diff --git a/tools/server/tests/unit/test_compat_anthropic.py b/tools/server/tests/unit/test_compat_anthropic.py index e16e0235c6..93ff03be6b 100644 --- a/tools/server/tests/unit/test_compat_anthropic.py +++ b/tools/server/tests/unit/test_compat_anthropic.py @@ -809,6 +809,139 @@ def test_anthropic_vs_openai_different_response_format(): # Extended thinking tests with reasoning models +# The next two tests cover the input path (conversation history): +# Client sends thinking blocks -> convert_anthropic_to_oai -> reasoning_content -> template + +def test_anthropic_thinking_history_in_count_tokens(): + """Test that interleaved thinking blocks in conversation history are not dropped during conversion.""" + global server + server.jinja = True + server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja' + server.start() + + tool = { + "name": "list_files", + "description": "List files", + "input_schema": { + "type": "object", + "properties": {"path": {"type": "string"}}, + "required": ["path"] + } + } + + messages_without_thinking = [ + {"role": "user", "content": "Fix the bug"}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}} + ] + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"} + ] + }, + ] + + messages_with_thinking = [ + {"role": "user", "content": "Fix the bug"}, + { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": "I should check the project structure first to understand the codebase layout."}, + {"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}} + ] + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"} + ] + }, + ] + + res_without = server.make_request("POST", "/v1/messages/count_tokens", data={ + "model": "test", + "messages": messages_without_thinking, + "tools": [tool], + }) + assert res_without.status_code == 200, f"Expected 200: {res_without.body}" + + res_with = server.make_request("POST", "/v1/messages/count_tokens", data={ + "model": "test", + "messages": messages_with_thinking, + "tools": [tool], + }) + assert res_with.status_code == 200, f"Expected 200: {res_with.body}" + + # Thinking blocks should increase the token count + assert res_with.body["input_tokens"] > res_without.body["input_tokens"], \ + f"Expected more tokens with thinking ({res_with.body['input_tokens']}) than without ({res_without.body['input_tokens']})" + + +def test_anthropic_thinking_history_in_template(): + """Test that reasoning_content from converted interleaved thinking blocks renders in the prompt.""" + global server + server.jinja = True + server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja' + server.start() + + reasoning_1 = "I should check the project structure first." + reasoning_2 = "Now I need to read the main file." + + res = server.make_request("POST", "/apply-template", data={ + "messages": [ + {"role": "user", "content": "Fix the bug in main.py"}, + { + "role": "assistant", + "content": "", + "reasoning_content": reasoning_1, + "tool_calls": [{ + "id": "call_1", + "type": "function", + "function": {"name": "list_files", "arguments": "{\"path\": \".\"}"} + }] + }, + {"role": "tool", "tool_call_id": "call_1", "content": "main.py\nutils.py"}, + { + "role": "assistant", + "content": "", + "reasoning_content": reasoning_2, + "tool_calls": [{ + "id": "call_2", + "type": "function", + "function": {"name": "read_file", "arguments": "{\"path\": \"main.py\"}"} + }] + }, + {"role": "tool", "tool_call_id": "call_2", "content": "print('hello')"}, + ], + "tools": [{ + "type": "function", + "function": { + "name": "list_files", + "description": "List files", + "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]} + } + }, { + "type": "function", + "function": { + "name": "read_file", + "description": "Read a file", + "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]} + } + }], + }) + assert res.status_code == 200, f"Expected 200, got {res.status_code}: {res.body}" + prompt = res.body["prompt"] + + # Both reasoning_content values should be rendered in tags + assert reasoning_1 in prompt, f"Expected first reasoning text in prompt: {prompt}" + assert reasoning_2 in prompt, f"Expected second reasoning text in prompt: {prompt}" + assert prompt.count("") >= 2, f"Expected at least 2 blocks in prompt: {prompt}" + + @pytest.mark.slow @pytest.mark.parametrize("stream", [False, True]) def test_anthropic_thinking_with_reasoning_model(stream):