server : add kill switch when server is stuck (#20277)
This commit is contained in:
parent
e8bbc736cb
commit
107d599952
|
|
@ -562,7 +562,7 @@ private:
|
||||||
|
|
||||||
llama_model_ptr model_dft;
|
llama_model_ptr model_dft;
|
||||||
|
|
||||||
bool add_bos_token = true;
|
bool add_bos_token = true;
|
||||||
|
|
||||||
int32_t n_ctx; // total context for all clients / slots
|
int32_t n_ctx; // total context for all clients / slots
|
||||||
|
|
||||||
|
|
@ -570,6 +570,7 @@ private:
|
||||||
std::vector<server_slot> slots;
|
std::vector<server_slot> slots;
|
||||||
|
|
||||||
int slots_debug = 0;
|
int slots_debug = 0;
|
||||||
|
int n_empty_consequtive = 0;
|
||||||
|
|
||||||
std::unique_ptr<server_prompt_cache> prompt_cache;
|
std::unique_ptr<server_prompt_cache> prompt_cache;
|
||||||
|
|
||||||
|
|
@ -2628,6 +2629,12 @@ private:
|
||||||
|
|
||||||
if (batch.n_tokens == 0) {
|
if (batch.n_tokens == 0) {
|
||||||
SRV_WRN("%s", "no tokens to decode\n");
|
SRV_WRN("%s", "no tokens to decode\n");
|
||||||
|
|
||||||
|
if (++n_empty_consequtive > 3) {
|
||||||
|
GGML_ABORT("fatal error - please provide logs and repro in %s\n", "https://github.com/ggml-org/llama.cpp/pull/20277");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
n_empty_consequtive = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t i_next = 0;
|
int32_t i_next = 0;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue