From 724155883523152c1398ba3d7d67ab4ccc23cc05 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Fri, 21 Nov 2025 23:06:09 +0100
Subject: [PATCH] better --models-dir

---
 common/common.cpp              | 14 ++++++--
 common/common.h                |  3 +-
 common/download.cpp            |  2 +-
 tools/server/README.md         | 28 ++++++++++++----
 tools/server/server-models.cpp | 58 ++++++++++++++++++++++------------
 5 files changed, 73 insertions(+), 32 deletions(-)
diff --git a/common/common.cpp b/common/common.cpp
index f3cc55247e..be31c66de1 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -911,7 +911,7 @@ std::string fs_get_cache_file(const std::string & filename) {
     return cache_directory + filename;
 }
 
-std::vector<common_file_info> fs_list_files(const std::string & path) {
+std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
     std::vector<common_file_info> files;
     if (path.empty()) return files;
 
@@ -926,14 +926,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
             const auto & p = entry.path();
             if (std::filesystem::is_regular_file(p)) {
                 common_file_info info;
-                info.path = p.string();
-                info.name = p.filename().string();
+                info.path   = p.string();
+                info.name   = p.filename().string();
+                info.is_dir = false;
                 try {
                     info.size = static_cast<size_t>(std::filesystem::file_size(p));
                 } catch (const std::filesystem::filesystem_error &) {
                     info.size = 0;
                 }
                 files.push_back(std::move(info));
+            } else if (include_directories && std::filesystem::is_directory(p)) {
+                common_file_info info;
+                info.path   = p.string();
+                info.name   = p.filename().string();
+                info.size   = 0; // Directories have no size
+                info.is_dir = true;
+                files.push_back(std::move(info));
             }
         } catch (const std::filesystem::filesystem_error &) {
             // skip entries we cannot inspect
diff --git a/common/common.h b/common/common.h
index 197af5e6f2..20ba209ce4 100644
--- a/common/common.h
+++ b/common/common.h
@@ -625,8 +625,9 @@ struct common_file_info {
     std::string path;
     std::string name;
     size_t      size = 0; // in bytes
+    bool        is_dir = false;
 };
-std::vector<common_file_info> fs_list_files(const std::string & path);
+std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
 
 //
 // Model utils
diff --git a/common/download.cpp b/common/download.cpp
index eeb32b6a86..1a3bc9216f 100644
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -1047,7 +1047,7 @@ std::string common_docker_resolve_model(const std::string &) {
 std::vector<common_cached_model_info> common_list_cached_models() {
     std::vector<common_cached_model_info> models;
     const std::string cache_dir = fs_get_cache_directory();
-    const std::vector<common_file_info> files = fs_list_files(cache_dir);
+    const std::vector<common_file_info> files = fs_list(cache_dir, false);
     for (const auto & file : files) {
         if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
             common_cached_model_info model_info;
diff --git a/tools/server/README.md b/tools/server/README.md
index 54c1062c9b..9d0ece82be 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -1364,18 +1364,32 @@ llama-server -hf <user>/<model>:<tag>
 
 *The server must be restarted after adding a new model.*
 
-Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Files prefixed with `mmproj-` will automatically be treated as multimodal projection files **for the model with the matching base name**:
+Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command:
 
 ```sh
-llama-3.2-1b-Q4_K_M.gguf
-gemma-3-4b-it-Q8_0.gguf
-mmproj-gemma-3-4b-it-Q8_0.gguf   # must be "mmproj-" + text model filename
+llama-server --models-dir ./models_directory
 ```
 
-Example:
+If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this:
 
 ```sh
-llama-server --models-dir ./path/to/models
+models_directory
+ │
+ │  # single file
+ ├─ llama-3.2-1b-Q4_K_M.gguf
+ ├─ Qwen3-8B-Q4_K_M.gguf
+ │
+ │  # multimodal
+ ├─ gemma-3-4b-it-Q8_0
+ │    ├─ gemma-3-4b-it-Q8_0.gguf
+ │    └─ mmproj-F16.gguf   # file name must start with "mmproj"
+ │
+ │  # multi-shard
+ ├─ gemma-3-4b-it-Q8_0
+ │    ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf
+ │    ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf
+ │    ├─ ...
+ │    └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf
 ```
 
 You may also specify default arguments that will be passed to every loaded model instance:
@@ -1384,6 +1398,8 @@ You may also specify default arguments that will be passed to every loaded model
 llama-server -ctx 8192 -n 1024 -np 2
 ```
 
+Note: model instances inherit both command line arguments and environment variables from the router server.
+
 ### Routing requests
 
 Requests are routed according to the requested model name.
diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp
index 1142cff217..346b07c795 100644
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@@ -80,32 +80,48 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
     if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
         throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
     }
-    auto files = fs_list_files(dir);
-    std::unordered_set<std::string> files_model;
-    std::unordered_set<std::string> files_mmproj;
-    for (const auto & file : files) {
-        // TODO: also handle multiple shards
-        if (string_ends_with(file.name, ".gguf")) {
-            if (string_starts_with(file.name, "mmproj-")) {
-                files_mmproj.insert(file.name);
-            } else {
-                files_model.insert(file.name);
+
+    std::vector<local_model> models;
+    auto scan_subdir = [&models](const std::string & subdir_path, const std::string name) {
+        auto files = fs_list(subdir_path, false);
+        common_file_info model_file;
+        common_file_info first_shard_file;
+        common_file_info mmproj_file;
+        for (const auto & file : files) {
+            if (string_ends_with(file.name, ".gguf")) {
+                if (file.name.find("mmproj") != std::string::npos) {
+                    mmproj_file = file;
+                } else if (file.name.find("-00001-of-") != std::string::npos) {
+                    first_shard_file = file;
+                } else {
+                    model_file = file;
+                }
             }
         }
-    }
-    std::vector<local_model> models;
-    for (const auto & model_file : files_model) {
-        bool has_mmproj = false;
-        std::string mmproj_file = "mmproj-" + model_file;
-        if (files_mmproj.find(mmproj_file) != files_mmproj.end()) {
-            has_mmproj = true;
-        }
+        // single file model
         local_model model{
-            /* name        */ model_file,
-            /* path        */ dir + DIRECTORY_SEPARATOR + model_file,
-            /* path_mmproj */ has_mmproj ? (dir + DIRECTORY_SEPARATOR + mmproj_file) : ""
+            /* name        */ name,
+            /* path        */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
+            /* path_mmproj */ mmproj_file.path // can be empty
         };
         models.push_back(model);
+    };
+
+    auto files = fs_list(dir, true);
+    for (const auto & file : files) {
+        if (file.is_dir) {
+            scan_subdir(file.path, file.name);
+        } else if (string_ends_with(file.name, ".gguf")) {
+            // single file model
+            std::string name = file.name;
+            string_replace_all(name, ".gguf", "");
+            local_model model{
+                /* name        */ name,
+                /* path        */ file.path,
+                /* path_mmproj */ ""
+            };
+            models.push_back(model);
+        }
     }
     return models;
 }