diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index af526dc73f..6b41d5f246 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -392,7 +392,7 @@ void server_models::unload_lru() { if (m.second.meta.is_active()) { count_active++; // If all active models are pinned, this condition never holds and no LRU eviction will occur. - // The server will keep all pinned models in memory, potentially exceeding models_max. + // We throw an error instead of allowing the server to exceed models_max. if (!m.second.meta.pinned && m.second.meta.last_used < lru_last_used) { lru_model_name = m.first; lru_last_used = m.second.meta.last_used; @@ -411,7 +411,10 @@ void server_models::unload_lru() { }); } } else if (count_active >= (size_t)base_params.models_max) { - SRV_WRN("models_max limit reached, but no unpinned models available for LRU eviction - automatic unload cannot succeed\n"); + throw std::runtime_error(string_format( + "models_max limit (%d) reached, but no unpinned models available for LRU eviction - cannot load more models", + base_params.models_max + )); } }