From 107d5999520dd02195ebe05278752db9fd33c865 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 9 Mar 2026 10:33:12 +0200 Subject: [PATCH] server : add kill switch when server is stuck (#20277) --- tools/server/server-context.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 3790308d0e..b67190a469 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -562,7 +562,7 @@ private: llama_model_ptr model_dft; - bool add_bos_token = true; + bool add_bos_token = true; int32_t n_ctx; // total context for all clients / slots @@ -570,6 +570,7 @@ private: std::vector slots; int slots_debug = 0; + int n_empty_consequtive = 0; std::unique_ptr prompt_cache; @@ -2628,6 +2629,12 @@ private: if (batch.n_tokens == 0) { SRV_WRN("%s", "no tokens to decode\n"); + + if (++n_empty_consequtive > 3) { + GGML_ABORT("fatal error - please provide logs and repro in %s\n", "https://github.com/ggml-org/llama.cpp/pull/20277"); + } + } else { + n_empty_consequtive = 0; } int32_t i_next = 0;