better --models-dir

2025-11-21 23:06:09 +01:00 · 2025-11-21 23:06:09 +01:00 · 7241558835
parent 7cd929076d
commit 7241558835
5 changed files with 73 additions and 32 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -911,7 +911,7 @@ std::string fs_get_cache_file(const std::string & filename) {
    return cache_directory + filename;
 }

-std::vector<common_file_info> fs_list_files(const std::string & path) {
+std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
    std::vector<common_file_info> files;
    if (path.empty()) return files;

@ -926,14 +926,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
            const auto & p = entry.path();
            if (std::filesystem::is_regular_file(p)) {
                common_file_info info;
-                info.path = p.string();
-                info.name = p.filename().string();
+                info.path   = p.string();
+                info.name   = p.filename().string();
+                info.is_dir = false;
                try {
                    info.size = static_cast<size_t>(std::filesystem::file_size(p));
                } catch (const std::filesystem::filesystem_error &) {
                    info.size = 0;
                }
                files.push_back(std::move(info));
+            } else if (include_directories && std::filesystem::is_directory(p)) {
+                common_file_info info;
+                info.path   = p.string();
+                info.name   = p.filename().string();
+                info.size   = 0; // Directories have no size
+                info.is_dir = true;
+                files.push_back(std::move(info));
            }
        } catch (const std::filesystem::filesystem_error &) {
            // skip entries we cannot inspect
--- a/common/common.h
+++ b/common/common.h
@ -625,8 +625,9 @@ struct common_file_info {
    std::string path;
    std::string name;
    size_t      size = 0; // in bytes
+    bool        is_dir = false;
 };
-std::vector<common_file_info> fs_list_files(const std::string & path);
+std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);

 //
 // Model utils
--- a/common/download.cpp
+++ b/common/download.cpp
@ -1047,7 +1047,7 @@ std::string common_docker_resolve_model(const std::string &) {
 std::vector<common_cached_model_info> common_list_cached_models() {
    std::vector<common_cached_model_info> models;
    const std::string cache_dir = fs_get_cache_directory();
-    const std::vector<common_file_info> files = fs_list_files(cache_dir);
+    const std::vector<common_file_info> files = fs_list(cache_dir, false);
    for (const auto & file : files) {
        if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
            common_cached_model_info model_info;
--- a/tools/server/README.md
+++ b/tools/server/README.md
@ -1364,18 +1364,32 @@ llama-server -hf <user>/<model>:<tag>

 *The server must be restarted after adding a new model.*

-Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Files prefixed with `mmproj-` will automatically be treated as multimodal projection files **for the model with the matching base name**:
+Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command:

 ```sh
-llama-3.2-1b-Q4_K_M.gguf
-gemma-3-4b-it-Q8_0.gguf
-mmproj-gemma-3-4b-it-Q8_0.gguf   # must be "mmproj-" + text model filename
+llama-server --models-dir ./models_directory
 ```

-Example:
+If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this:

 ```sh
-llama-server --models-dir ./path/to/models
+models_directory
+ │
+ │  # single file
+ ├─ llama-3.2-1b-Q4_K_M.gguf
+ ├─ Qwen3-8B-Q4_K_M.gguf
+ │
+ │  # multimodal
+ ├─ gemma-3-4b-it-Q8_0
+ │    ├─ gemma-3-4b-it-Q8_0.gguf
+ │    └─ mmproj-F16.gguf   # file name must start with "mmproj"
+ │
+ │  # multi-shard
+ ├─ gemma-3-4b-it-Q8_0
+ │    ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf
+ │    ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf
+ │    ├─ ...
+ │    └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf
 ```

 You may also specify default arguments that will be passed to every loaded model instance:
@ -1384,6 +1398,8 @@ You may also specify default arguments that will be passed to every loaded model
 llama-server -ctx 8192 -n 1024 -np 2
 ```

+Note: model instances inherit both command line arguments and environment variables from the router server.
+
 ### Routing requests

 Requests are routed according to the requested model name.
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@ -80,32 +80,48 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
    if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
        throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
    }
-    auto files = fs_list_files(dir);
-    std::unordered_set<std::string> files_model;
-    std::unordered_set<std::string> files_mmproj;
-    for (const auto & file : files) {
-        // TODO: also handle multiple shards
-        if (string_ends_with(file.name, ".gguf")) {
-            if (string_starts_with(file.name, "mmproj-")) {
-                files_mmproj.insert(file.name);
-            } else {
-                files_model.insert(file.name);
+
+    std::vector<local_model> models;
+    auto scan_subdir = [&models](const std::string & subdir_path, const std::string name) {
+        auto files = fs_list(subdir_path, false);
+        common_file_info model_file;
+        common_file_info first_shard_file;
+        common_file_info mmproj_file;
+        for (const auto & file : files) {
+            if (string_ends_with(file.name, ".gguf")) {
+                if (file.name.find("mmproj") != std::string::npos) {
+                    mmproj_file = file;
+                } else if (file.name.find("-00001-of-") != std::string::npos) {
+                    first_shard_file = file;
+                } else {
+                    model_file = file;
+                }
            }
        }
-    }
-    std::vector<local_model> models;
-    for (const auto & model_file : files_model) {
-        bool has_mmproj = false;
-        std::string mmproj_file = "mmproj-" + model_file;
-        if (files_mmproj.find(mmproj_file) != files_mmproj.end()) {
-            has_mmproj = true;
-        }
+        // single file model
        local_model model{
-            /* name        */ model_file,
-            /* path        */ dir + DIRECTORY_SEPARATOR + model_file,
-            /* path_mmproj */ has_mmproj ? (dir + DIRECTORY_SEPARATOR + mmproj_file) : ""
+            /* name        */ name,
+            /* path        */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
+            /* path_mmproj */ mmproj_file.path // can be empty
        };
        models.push_back(model);
+    };
+
+    auto files = fs_list(dir, true);
+    for (const auto & file : files) {
+        if (file.is_dir) {
+            scan_subdir(file.path, file.name);
+        } else if (string_ends_with(file.name, ".gguf")) {
+            // single file model
+            std::string name = file.name;
+            string_replace_all(name, ".gguf", "");
+            local_model model{
+                /* name        */ name,
+                /* path        */ file.path,
+                /* path_mmproj */ ""
+            };
+            models.push_back(model);
+        }
    }
    return models;
 }