From c830f99cfa79d7e627e48de32280838f97b41115 Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Tue, 24 Feb 2026 10:30:00 +0200 Subject: [PATCH] server : support max_completion_tokens request property (#19831) "max_tokens" is deprectated in favor of "max_completion_tokens" which sets the upper bound for reasoning+output token. Closes: #13700 --- tools/server/server-task.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index a137427c69..739e30a704 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -204,7 +204,8 @@ task_params server_task::params_from_json_cmpl( params.cache_prompt = json_value(data, "cache_prompt", defaults.cache_prompt); params.return_tokens = json_value(data, "return_tokens", false); params.return_progress = json_value(data, "return_progress", false); - params.n_predict = json_value(data, "n_predict", json_value(data, "max_tokens", defaults.n_predict)); + auto max_tokens = json_value(data, "max_tokens", defaults.n_predict); + params.n_predict = json_value(data, "n_predict", json_value(data, "max_completion_tokens", max_tokens)); params.n_indent = json_value(data, "n_indent", defaults.n_indent); params.n_keep = json_value(data, "n_keep", defaults.n_keep); params.n_discard = json_value(data, "n_discard", defaults.n_discard);