server : support reasoning parameters from OAI, OpenRouter and Claude APIs
This commit is contained in:
parent
57819b8d4b
commit
9f8d6fef78
|
|
@ -1026,9 +1026,48 @@ json oaicompat_chat_params_parse(
|
||||||
llama_params["parse_tool_calls"] = true;
|
llama_params["parse_tool_calls"] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// merge the template args provided from command line with the args provided in the user request
|
|
||||||
auto chat_template_kwargs_object = json_value(body, "chat_template_kwargs", json::object());
|
|
||||||
inputs.chat_template_kwargs = opt.chat_template_kwargs;
|
inputs.chat_template_kwargs = opt.chat_template_kwargs;
|
||||||
|
|
||||||
|
// parse the oai compatible "reasoning_effort" or openrouter compatible "reasoning.effort"
|
||||||
|
// to override "chat_template_kwargs.enable_thinking"
|
||||||
|
auto reasoning_effort = json_value(
|
||||||
|
body,
|
||||||
|
"reasoning_effort",
|
||||||
|
json_value(
|
||||||
|
json_value(body, "reasoning", json::object()),
|
||||||
|
"effort",
|
||||||
|
std::string("")
|
||||||
|
)
|
||||||
|
);
|
||||||
|
if (reasoning_effort == "none") {
|
||||||
|
inputs.chat_template_kwargs["enable_thinking"] = "false";
|
||||||
|
} else if (reasoning_effort != "") {
|
||||||
|
inputs.chat_template_kwargs["enable_thinking"] = "true";
|
||||||
|
}
|
||||||
|
// override "chat_template_kwargs.reasoning_effort"
|
||||||
|
if (reasoning_effort != "") {
|
||||||
|
inputs.chat_template_kwargs["reasoning_effort"] = json(reasoning_effort).dump();
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse the claude compatible "thinking.type" arg to override "chat_template_kwargs.enable_thinking"
|
||||||
|
auto claude_thinking = json_value(body, "thinking", json::object());
|
||||||
|
auto claude_thinking_type = json_value(claude_thinking, "type", std::string(""));
|
||||||
|
if (claude_thinking_type == "disabled") {
|
||||||
|
inputs.chat_template_kwargs["enable_thinking"] = "false";
|
||||||
|
} else if (claude_thinking_type != "") {
|
||||||
|
inputs.chat_template_kwargs["enable_thinking"] = "true";
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse the claude compatible "output_config.effort" arg to override "chat_template_kwargs.reasoning_effort"
|
||||||
|
auto claude_output_config = json_value(body, "output_config", json::object());
|
||||||
|
auto claude_output_config_effort = json_value(body, "effort", std::string(""));
|
||||||
|
if (claude_output_config_effort != "") {
|
||||||
|
inputs.chat_template_kwargs["reasoning_effort"] = json(claude_output_config_effort).dump();
|
||||||
|
}
|
||||||
|
|
||||||
|
// merge the template args provided from command line with the args provided in the user request
|
||||||
|
// this takes precedence over vendor compatibility args
|
||||||
|
auto chat_template_kwargs_object = json_value(body, "chat_template_kwargs", json::object());
|
||||||
for (const auto & item : chat_template_kwargs_object.items()) {
|
for (const auto & item : chat_template_kwargs_object.items()) {
|
||||||
inputs.chat_template_kwargs[item.key()] = item.value().dump();
|
inputs.chat_template_kwargs[item.key()] = item.value().dump();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue