From 724155883523152c1398ba3d7d67ab4ccc23cc05 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 21 Nov 2025 23:06:09 +0100 Subject: [PATCH] better --models-dir --- common/common.cpp | 14 ++++++-- common/common.h | 3 +- common/download.cpp | 2 +- tools/server/README.md | 28 ++++++++++++---- tools/server/server-models.cpp | 58 ++++++++++++++++++++++------------ 5 files changed, 73 insertions(+), 32 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index f3cc55247e..be31c66de1 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -911,7 +911,7 @@ std::string fs_get_cache_file(const std::string & filename) { return cache_directory + filename; } -std::vector fs_list_files(const std::string & path) { +std::vector fs_list(const std::string & path, bool include_directories) { std::vector files; if (path.empty()) return files; @@ -926,14 +926,22 @@ std::vector fs_list_files(const std::string & path) { const auto & p = entry.path(); if (std::filesystem::is_regular_file(p)) { common_file_info info; - info.path = p.string(); - info.name = p.filename().string(); + info.path = p.string(); + info.name = p.filename().string(); + info.is_dir = false; try { info.size = static_cast(std::filesystem::file_size(p)); } catch (const std::filesystem::filesystem_error &) { info.size = 0; } files.push_back(std::move(info)); + } else if (include_directories && std::filesystem::is_directory(p)) { + common_file_info info; + info.path = p.string(); + info.name = p.filename().string(); + info.size = 0; // Directories have no size + info.is_dir = true; + files.push_back(std::move(info)); } } catch (const std::filesystem::filesystem_error &) { // skip entries we cannot inspect diff --git a/common/common.h b/common/common.h index 197af5e6f2..20ba209ce4 100644 --- a/common/common.h +++ b/common/common.h @@ -625,8 +625,9 @@ struct common_file_info { std::string path; std::string name; size_t size = 0; // in bytes + bool is_dir = false; }; -std::vector fs_list_files(const std::string & path); +std::vector fs_list(const std::string & path, bool include_directories); // // Model utils diff --git a/common/download.cpp b/common/download.cpp index eeb32b6a86..1a3bc9216f 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -1047,7 +1047,7 @@ std::string common_docker_resolve_model(const std::string &) { std::vector common_list_cached_models() { std::vector models; const std::string cache_dir = fs_get_cache_directory(); - const std::vector files = fs_list_files(cache_dir); + const std::vector files = fs_list(cache_dir, false); for (const auto & file : files) { if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) { common_cached_model_info model_info; diff --git a/tools/server/README.md b/tools/server/README.md index 54c1062c9b..9d0ece82be 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -1364,18 +1364,32 @@ llama-server -hf /: *The server must be restarted after adding a new model.* -Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Files prefixed with `mmproj-` will automatically be treated as multimodal projection files **for the model with the matching base name**: +Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command: ```sh -llama-3.2-1b-Q4_K_M.gguf -gemma-3-4b-it-Q8_0.gguf -mmproj-gemma-3-4b-it-Q8_0.gguf # must be "mmproj-" + text model filename +llama-server --models-dir ./models_directory ``` -Example: +If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this: ```sh -llama-server --models-dir ./path/to/models +models_directory + │ + │ # single file + ├─ llama-3.2-1b-Q4_K_M.gguf + ├─ Qwen3-8B-Q4_K_M.gguf + │ + │ # multimodal + ├─ gemma-3-4b-it-Q8_0 + │ ├─ gemma-3-4b-it-Q8_0.gguf + │ └─ mmproj-F16.gguf # file name must start with "mmproj" + │ + │ # multi-shard + ├─ gemma-3-4b-it-Q8_0 + │ ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf + │ ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf + │ ├─ ... + │ └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf ``` You may also specify default arguments that will be passed to every loaded model instance: @@ -1384,6 +1398,8 @@ You may also specify default arguments that will be passed to every loaded model llama-server -ctx 8192 -n 1024 -np 2 ``` +Note: model instances inherit both command line arguments and environment variables from the router server. + ### Routing requests Requests are routed according to the requested model name. diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 1142cff217..346b07c795 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -80,32 +80,48 @@ static std::vector list_local_models(const std::string & dir) { if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) { throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str())); } - auto files = fs_list_files(dir); - std::unordered_set files_model; - std::unordered_set files_mmproj; - for (const auto & file : files) { - // TODO: also handle multiple shards - if (string_ends_with(file.name, ".gguf")) { - if (string_starts_with(file.name, "mmproj-")) { - files_mmproj.insert(file.name); - } else { - files_model.insert(file.name); + + std::vector models; + auto scan_subdir = [&models](const std::string & subdir_path, const std::string name) { + auto files = fs_list(subdir_path, false); + common_file_info model_file; + common_file_info first_shard_file; + common_file_info mmproj_file; + for (const auto & file : files) { + if (string_ends_with(file.name, ".gguf")) { + if (file.name.find("mmproj") != std::string::npos) { + mmproj_file = file; + } else if (file.name.find("-00001-of-") != std::string::npos) { + first_shard_file = file; + } else { + model_file = file; + } } } - } - std::vector models; - for (const auto & model_file : files_model) { - bool has_mmproj = false; - std::string mmproj_file = "mmproj-" + model_file; - if (files_mmproj.find(mmproj_file) != files_mmproj.end()) { - has_mmproj = true; - } + // single file model local_model model{ - /* name */ model_file, - /* path */ dir + DIRECTORY_SEPARATOR + model_file, - /* path_mmproj */ has_mmproj ? (dir + DIRECTORY_SEPARATOR + mmproj_file) : "" + /* name */ name, + /* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path, + /* path_mmproj */ mmproj_file.path // can be empty }; models.push_back(model); + }; + + auto files = fs_list(dir, true); + for (const auto & file : files) { + if (file.is_dir) { + scan_subdir(file.path, file.name); + } else if (string_ends_with(file.name, ".gguf")) { + // single file model + std::string name = file.name; + string_replace_all(name, ".gguf", ""); + local_model model{ + /* name */ name, + /* path */ file.path, + /* path_mmproj */ "" + }; + models.push_back(model); + } } return models; }