From be9837b66e9db1eb1f1d3868fbc3eca266a2ad9b Mon Sep 17 00:00:00 2001
From: Mikhail Shevtsov <mikhail.shevtsov@wiregate.io>
Date: Tue, 24 Feb 2026 17:14:08 +0100
Subject: [PATCH] server : add default-model preset and fallback logic

---
 common/arg.cpp                 |  6 ++++++
 common/arg.h                   |  1 +
 tools/server/README.md         |  2 ++
 tools/server/server-models.cpp | 35 ++++++++++++++++++++++++++++++++++
 tools/server/server-models.h   |  4 ++++
 5 files changed, 48 insertions(+)

diff --git a/common/arg.cpp b/common/arg.cpp
index 41da8563d6..00e8d69816 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -3853,6 +3853,12 @@ void common_params_add_preset_options(std::vector<common_arg> & args) {
         [](common_params &, const std::string &) { /* unused */ }
     ).set_env(COMMON_ARG_PRESET_LOAD_ON_STARTUP).set_preset_only());
 
+    args.push_back(common_arg(
+        {"default-model"}, "NAME",
+        "in server router mode, this model will be used if model not found",
+        [](common_params &, const std::string &) { /* unused */ }
+    ).set_env(COMMON_ARG_PRESET_DEFAULT_MODEL).set_preset_only());
+
     args.push_back(common_arg(
         {"stop-timeout"}, "SECONDS",
         "in server router mode, force-kill model instance after this many seconds of graceful shutdown",
diff --git a/common/arg.h b/common/arg.h
index 55782a158d..a6b627e609 100644
--- a/common/arg.h
+++ b/common/arg.h
@@ -11,6 +11,7 @@
 // pseudo-env variable to identify preset-only arguments
 #define COMMON_ARG_PRESET_LOAD_ON_STARTUP "__PRESET_LOAD_ON_STARTUP"
 #define COMMON_ARG_PRESET_STOP_TIMEOUT    "__PRESET_STOP_TIMEOUT"
+#define COMMON_ARG_PRESET_DEFAULT_MODEL   "__PRESET_DEFAULT_MODEL"
 
 //
 // CLI argument parsing
diff --git a/tools/server/README.md b/tools/server/README.md
index da16ddc756..68f56bd3fb 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -1552,6 +1552,8 @@ The precedence rule for preset options is as follows:
 
 We also offer additional options that are exclusive to presets (these aren't treated as command-line arguments):
 - `load-on-startup` (boolean): Controls whether the model loads automatically when the server starts
+- `default-model` (boolean): The model to use when no model is specified in a request or the model is not found.
+   When multiple `default-model` options are found only the first one will be used.
 - `stop-timeout` (int, seconds): After requested unload, wait for this many seconds before forcing termination (default: 10)
 
 ### Routing requests
diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp
index c13d48a363..61e962255c 100644
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@@ -306,6 +306,30 @@ void server_models::load_models() {
         add_model(std::move(meta));
     }
 
+    // determine default model if any
+    bool first_found = false;
+    std::string first_default;
+    for (const auto & [name, inst] : mapping) {
+        std::string val;
+        if (!inst.meta.preset.get_option(COMMON_ARG_PRESET_DEFAULT_MODEL, val)) {
+            continue;
+        }
+        if (!first_found) {
+            default_model_name = name;
+            SRV_INF("Default preset model: %s\n", name.c_str());
+            first_default = name;
+            first_found = true;
+        } else {
+            SRV_WRN(
+                "Multiple default models detected: '%s' and '%s'; "
+                "using '%s' as default\n",
+                name.c_str(),
+                first_default.c_str(),
+                first_default.c_str()
+            );
+        }
+    }
+
     // log available models
     {
         std::unordered_set<std::string> custom_names;
@@ -374,6 +398,15 @@ void server_models::load_models() {
     }
 }
 
+std::string server_models::resolve_model_name(const std::string & requested) {
+    // If a non‑empty request matches a known model, use it.
+    if (!requested.empty() && has_model(requested)) {
+        return requested;
+    }
+    // Otherwise fall back to the default model if one is set.
+    return default_model_name.empty() ? requested : default_model_name;
+}
+
 void server_models::update_meta(const std::string & name, const server_model_meta & meta) {
     std::lock_guard<std::mutex> lk(mutex);
     auto it = mapping.find(name);
@@ -904,6 +937,7 @@ void server_models_routes::init_routes() {
     this->proxy_get = [this](const server_http_req & req) {
         std::string method = "GET";
         std::string name = req.get_param("model");
+        name = models.resolve_model_name(name);
         bool autoload = is_autoload(params, req);
         auto error_res = std::make_unique<server_http_res>();
         if (!router_validate_model(name, models, autoload, error_res)) {
@@ -916,6 +950,7 @@ void server_models_routes::init_routes() {
         std::string method = "POST";
         json body = json::parse(req.body);
         std::string name = json_value(body, "model", std::string());
+        name = models.resolve_model_name(name);
         bool autoload = is_autoload(params, req);
         auto error_res = std::make_unique<server_http_res>();
         if (!router_validate_model(name, models, autoload, error_res)) {
diff --git a/tools/server/server-models.h b/tools/server/server-models.h
index 2b392f299a..77e9c1d45f 100644
--- a/tools/server/server-models.h
+++ b/tools/server/server-models.h
@@ -97,6 +97,7 @@ private:
     std::string bin_path;
     std::vector<std::string> base_env;
     common_preset base_preset; // base preset from llama-server CLI args
+    std::string default_model_name;
 
     void update_meta(const std::string & name, const server_model_meta & meta);
 
@@ -143,6 +144,9 @@ public:
     // notify the router server that a model instance is ready
     // return the monitoring thread (to be joined by the caller)
     static std::thread setup_child_server(const std::function<void(int)> & shutdown_handler);
+
+    // Resolve model name: fallback to default if requested name is empty or not found
+    std::string resolve_model_name(const std::string & requested);
 };
 
 struct server_models_routes {