From d9e65c3baf1a079d6762949ea9db958fa25c36b8 Mon Sep 17 00:00:00 2001 From: Michel Belleau Date: Thu, 25 Dec 2025 17:50:57 -0500 Subject: [PATCH] Add warning when LRU eviction cannot succeed due to all models being pinned When models_max limit is reached but all active models are pinned, log a warning message to clarify that automatic unload cannot succeed. ... also add --pin preset option documentation. --- tools/server/README.md | 1 + tools/server/server-models.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tools/server/README.md b/tools/server/README.md index 7d2f6f798e..43e7b066da 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -1487,6 +1487,7 @@ The precedence rule for preset options is as follows: We also offer additional options that are exclusive to presets (these aren't treated as command-line arguments): - `load-on-startup` (boolean): Controls whether the model loads automatically when the server starts - `stop-timeout` (int, seconds): After requested unload, wait for this many seconds before forcing termination (default: 10) +- `pin` (boolean): Prevents the router from unloading this model when the `models_max` limit is exceeded; the model remains loaded until explicitly unloaded or the server restarts. ### Routing requests diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 0647ad2e2c..af526dc73f 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -391,6 +391,8 @@ void server_models::unload_lru() { for (const auto & m : mapping) { if (m.second.meta.is_active()) { count_active++; + // If all active models are pinned, this condition never holds and no LRU eviction will occur. + // The server will keep all pinned models in memory, potentially exceeding models_max. if (!m.second.meta.pinned && m.second.meta.last_used < lru_last_used) { lru_model_name = m.first; lru_last_used = m.second.meta.last_used; @@ -408,6 +410,8 @@ void server_models::unload_lru() { return mapping[lru_model_name].meta.status == SERVER_MODEL_STATUS_UNLOADED; }); } + } else if (count_active >= (size_t)base_params.models_max) { + SRV_WRN("models_max limit reached, but no unpinned models available for LRU eviction - automatic unload cannot succeed\n"); } }