server : add kill switch when server is stuck (#20277)

2026-03-09 10:33:12 +02:00 · 2026-03-09 10:33:12 +02:00 · 107d599952
parent e8bbc736cb
commit 107d599952
1 changed files with 8 additions and 1 deletions
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@ -562,7 +562,7 @@ private:
    llama_model_ptr model_dft;
-    bool add_bos_token  = true;
+    bool add_bos_token = true;
    int32_t n_ctx; // total context for all clients / slots
@ -570,6 +570,7 @@ private:
    std::vector<server_slot> slots;
    int slots_debug = 0;
    int n_empty_consequtive = 0;
    std::unique_ptr<server_prompt_cache> prompt_cache;
@ -2628,6 +2629,12 @@ private:
        if (batch.n_tokens == 0) {
            SRV_WRN("%s", "no tokens to decode\n");
            if (++n_empty_consequtive > 3) {
                GGML_ABORT("fatal error - please provide logs and repro in %s\n", "https://github.com/ggml-org/llama.cpp/pull/20277");
            }
        } else {
            n_empty_consequtive = 0;
        }
        int32_t i_next = 0;