server : preserve anthropic thinking blocks in conversion (#20120)
* server : preserve anthropic thinking blocks in conversion (#20090) * server : add tests for anthropic thinking block conversion --------- Co-authored-by: root <root@llamacpp.home>
This commit is contained in:
parent
ba2fd11cdf
commit
e68f2fb894
|
|
@ -1463,6 +1463,7 @@ json convert_anthropic_to_oai(const json & body) {
|
|||
json tool_calls = json::array();
|
||||
json converted_content = json::array();
|
||||
json tool_results = json::array();
|
||||
std::string reasoning_content;
|
||||
bool has_tool_calls = false;
|
||||
|
||||
for (const auto & block : content) {
|
||||
|
|
@ -1470,6 +1471,8 @@ json convert_anthropic_to_oai(const json & body) {
|
|||
|
||||
if (type == "text") {
|
||||
converted_content.push_back(block);
|
||||
} else if (type == "thinking") {
|
||||
reasoning_content += json_value(block, "thinking", std::string());
|
||||
} else if (type == "image") {
|
||||
json source = json_value(block, "source", json::object());
|
||||
std::string source_type = json_value(source, "type", std::string());
|
||||
|
|
@ -1528,16 +1531,19 @@ json convert_anthropic_to_oai(const json & body) {
|
|||
}
|
||||
}
|
||||
|
||||
if (!converted_content.empty() || has_tool_calls) {
|
||||
if (!converted_content.empty() || has_tool_calls || !reasoning_content.empty()) {
|
||||
json new_msg = {{"role", role}};
|
||||
if (!converted_content.empty()) {
|
||||
new_msg["content"] = converted_content;
|
||||
} else if (has_tool_calls) {
|
||||
} else if (has_tool_calls || !reasoning_content.empty()) {
|
||||
new_msg["content"] = "";
|
||||
}
|
||||
if (!tool_calls.empty()) {
|
||||
new_msg["tool_calls"] = tool_calls;
|
||||
}
|
||||
if (!reasoning_content.empty()) {
|
||||
new_msg["reasoning_content"] = reasoning_content;
|
||||
}
|
||||
oai_messages.push_back(new_msg);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -809,6 +809,139 @@ def test_anthropic_vs_openai_different_response_format():
|
|||
|
||||
# Extended thinking tests with reasoning models
|
||||
|
||||
# The next two tests cover the input path (conversation history):
|
||||
# Client sends thinking blocks -> convert_anthropic_to_oai -> reasoning_content -> template
|
||||
|
||||
def test_anthropic_thinking_history_in_count_tokens():
|
||||
"""Test that interleaved thinking blocks in conversation history are not dropped during conversion."""
|
||||
global server
|
||||
server.jinja = True
|
||||
server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja'
|
||||
server.start()
|
||||
|
||||
tool = {
|
||||
"name": "list_files",
|
||||
"description": "List files",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {"path": {"type": "string"}},
|
||||
"required": ["path"]
|
||||
}
|
||||
}
|
||||
|
||||
messages_without_thinking = [
|
||||
{"role": "user", "content": "Fix the bug"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
||||
messages_with_thinking = [
|
||||
{"role": "user", "content": "Fix the bug"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "I should check the project structure first to understand the codebase layout."},
|
||||
{"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
||||
res_without = server.make_request("POST", "/v1/messages/count_tokens", data={
|
||||
"model": "test",
|
||||
"messages": messages_without_thinking,
|
||||
"tools": [tool],
|
||||
})
|
||||
assert res_without.status_code == 200, f"Expected 200: {res_without.body}"
|
||||
|
||||
res_with = server.make_request("POST", "/v1/messages/count_tokens", data={
|
||||
"model": "test",
|
||||
"messages": messages_with_thinking,
|
||||
"tools": [tool],
|
||||
})
|
||||
assert res_with.status_code == 200, f"Expected 200: {res_with.body}"
|
||||
|
||||
# Thinking blocks should increase the token count
|
||||
assert res_with.body["input_tokens"] > res_without.body["input_tokens"], \
|
||||
f"Expected more tokens with thinking ({res_with.body['input_tokens']}) than without ({res_without.body['input_tokens']})"
|
||||
|
||||
|
||||
def test_anthropic_thinking_history_in_template():
|
||||
"""Test that reasoning_content from converted interleaved thinking blocks renders in the prompt."""
|
||||
global server
|
||||
server.jinja = True
|
||||
server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja'
|
||||
server.start()
|
||||
|
||||
reasoning_1 = "I should check the project structure first."
|
||||
reasoning_2 = "Now I need to read the main file."
|
||||
|
||||
res = server.make_request("POST", "/apply-template", data={
|
||||
"messages": [
|
||||
{"role": "user", "content": "Fix the bug in main.py"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": reasoning_1,
|
||||
"tool_calls": [{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {"name": "list_files", "arguments": "{\"path\": \".\"}"}
|
||||
}]
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "main.py\nutils.py"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": reasoning_2,
|
||||
"tool_calls": [{
|
||||
"id": "call_2",
|
||||
"type": "function",
|
||||
"function": {"name": "read_file", "arguments": "{\"path\": \"main.py\"}"}
|
||||
}]
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_2", "content": "print('hello')"},
|
||||
],
|
||||
"tools": [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "list_files",
|
||||
"description": "List files",
|
||||
"parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}
|
||||
}
|
||||
}, {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"description": "Read a file",
|
||||
"parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}
|
||||
}
|
||||
}],
|
||||
})
|
||||
assert res.status_code == 200, f"Expected 200, got {res.status_code}: {res.body}"
|
||||
prompt = res.body["prompt"]
|
||||
|
||||
# Both reasoning_content values should be rendered in <think> tags
|
||||
assert reasoning_1 in prompt, f"Expected first reasoning text in prompt: {prompt}"
|
||||
assert reasoning_2 in prompt, f"Expected second reasoning text in prompt: {prompt}"
|
||||
assert prompt.count("<think>") >= 2, f"Expected at least 2 <think> blocks in prompt: {prompt}"
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("stream", [False, True])
|
||||
def test_anthropic_thinking_with_reasoning_model(stream):
|
||||
|
|
|
|||
Loading…
Reference in New Issue