From cc0a04343e56f2decdf2a6df977ff9c9edc85ced Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Fri, 19 Dec 2025 19:10:00 +0800 Subject: [PATCH] server: friendlier error msg when ctx < input (#18174) * llama-server: friendlier error msg when ctx < input This PR adds formatted strings to the server's send_error function * llama-server: use string_format inline * fix test --- tools/server/server-context.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index def57d0252..9228fba9f5 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1974,19 +1974,33 @@ struct server_context_impl { if (!slot.can_split()) { if (slot.task->n_tokens() > n_ubatch) { - send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER); + send_error(slot, + string_format( + "input (%d tokens) is too large to process. increase the physical batch " + "size (current batch size: %d)", + slot.task->n_tokens(), n_ubatch), + ERROR_TYPE_SERVER); slot.release(); continue; } if (slot.task->n_tokens() > slot.n_ctx) { - send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_EXCEED_CONTEXT_SIZE); + send_error( + slot, + string_format( + "input (%d tokens) is larger than the max context size (%d tokens). skipping", + slot.task->n_tokens(), slot.n_ctx), + ERROR_TYPE_EXCEED_CONTEXT_SIZE); slot.release(); continue; } } else { if (slot.task->n_tokens() >= slot.n_ctx) { - send_error(slot, "the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE); + send_error(slot, + string_format("request (%d tokens) exceeds the available context size (%d " + "tokens), try increasing it", + slot.task->n_tokens(), slot.n_ctx), + ERROR_TYPE_EXCEED_CONTEXT_SIZE); slot.release(); continue; }