diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index bd203228cc..b47d3e3cb8 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1026,9 +1026,48 @@ json oaicompat_chat_params_parse( llama_params["parse_tool_calls"] = true; } - // merge the template args provided from command line with the args provided in the user request - auto chat_template_kwargs_object = json_value(body, "chat_template_kwargs", json::object()); inputs.chat_template_kwargs = opt.chat_template_kwargs; + + // parse the oai compatible "reasoning_effort" or openrouter compatible "reasoning.effort" + // to override "chat_template_kwargs.enable_thinking" + auto reasoning_effort = json_value( + body, + "reasoning_effort", + json_value( + json_value(body, "reasoning", json::object()), + "effort", + std::string("") + ) + ); + if (reasoning_effort == "none") { + inputs.chat_template_kwargs["enable_thinking"] = "false"; + } else if (reasoning_effort != "") { + inputs.chat_template_kwargs["enable_thinking"] = "true"; + } + // override "chat_template_kwargs.reasoning_effort" + if (reasoning_effort != "") { + inputs.chat_template_kwargs["reasoning_effort"] = json(reasoning_effort).dump(); + } + + // parse the claude compatible "thinking.type" arg to override "chat_template_kwargs.enable_thinking" + auto claude_thinking = json_value(body, "thinking", json::object()); + auto claude_thinking_type = json_value(claude_thinking, "type", std::string("")); + if (claude_thinking_type == "disabled") { + inputs.chat_template_kwargs["enable_thinking"] = "false"; + } else if (claude_thinking_type != "") { + inputs.chat_template_kwargs["enable_thinking"] = "true"; + } + + // parse the claude compatible "output_config.effort" arg to override "chat_template_kwargs.reasoning_effort" + auto claude_output_config = json_value(body, "output_config", json::object()); + auto claude_output_config_effort = json_value(body, "effort", std::string("")); + if (claude_output_config_effort != "") { + inputs.chat_template_kwargs["reasoning_effort"] = json(claude_output_config_effort).dump(); + } + + // merge the template args provided from command line with the args provided in the user request + // this takes precedence over vendor compatibility args + auto chat_template_kwargs_object = json_value(body, "chat_template_kwargs", json::object()); for (const auto & item : chat_template_kwargs_object.items()) { inputs.chat_template_kwargs[item.key()] = item.value().dump(); }