better --models-dir

This commit is contained in:
Xuan Son Nguyen 2025-11-21 23:06:09 +01:00
parent 7cd929076d
commit 7241558835
5 changed files with 73 additions and 32 deletions

View File

@ -911,7 +911,7 @@ std::string fs_get_cache_file(const std::string & filename) {
return cache_directory + filename;
}
std::vector<common_file_info> fs_list_files(const std::string & path) {
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
std::vector<common_file_info> files;
if (path.empty()) return files;
@ -926,14 +926,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
const auto & p = entry.path();
if (std::filesystem::is_regular_file(p)) {
common_file_info info;
info.path = p.string();
info.name = p.filename().string();
info.path = p.string();
info.name = p.filename().string();
info.is_dir = false;
try {
info.size = static_cast<size_t>(std::filesystem::file_size(p));
} catch (const std::filesystem::filesystem_error &) {
info.size = 0;
}
files.push_back(std::move(info));
} else if (include_directories && std::filesystem::is_directory(p)) {
common_file_info info;
info.path = p.string();
info.name = p.filename().string();
info.size = 0; // Directories have no size
info.is_dir = true;
files.push_back(std::move(info));
}
} catch (const std::filesystem::filesystem_error &) {
// skip entries we cannot inspect

View File

@ -625,8 +625,9 @@ struct common_file_info {
std::string path;
std::string name;
size_t size = 0; // in bytes
bool is_dir = false;
};
std::vector<common_file_info> fs_list_files(const std::string & path);
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
//
// Model utils

View File

@ -1047,7 +1047,7 @@ std::string common_docker_resolve_model(const std::string &) {
std::vector<common_cached_model_info> common_list_cached_models() {
std::vector<common_cached_model_info> models;
const std::string cache_dir = fs_get_cache_directory();
const std::vector<common_file_info> files = fs_list_files(cache_dir);
const std::vector<common_file_info> files = fs_list(cache_dir, false);
for (const auto & file : files) {
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
common_cached_model_info model_info;

View File

@ -1364,18 +1364,32 @@ llama-server -hf <user>/<model>:<tag>
*The server must be restarted after adding a new model.*
Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Files prefixed with `mmproj-` will automatically be treated as multimodal projection files **for the model with the matching base name**:
Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command:
```sh
llama-3.2-1b-Q4_K_M.gguf
gemma-3-4b-it-Q8_0.gguf
mmproj-gemma-3-4b-it-Q8_0.gguf # must be "mmproj-" + text model filename
llama-server --models-dir ./models_directory
```
Example:
If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this:
```sh
llama-server --models-dir ./path/to/models
models_directory
│ # single file
├─ llama-3.2-1b-Q4_K_M.gguf
├─ Qwen3-8B-Q4_K_M.gguf
│ # multimodal
├─ gemma-3-4b-it-Q8_0
│ ├─ gemma-3-4b-it-Q8_0.gguf
│ └─ mmproj-F16.gguf # file name must start with "mmproj"
│ # multi-shard
├─ gemma-3-4b-it-Q8_0
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf
│ ├─ ...
│ └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf
```
You may also specify default arguments that will be passed to every loaded model instance:
@ -1384,6 +1398,8 @@ You may also specify default arguments that will be passed to every loaded model
llama-server -ctx 8192 -n 1024 -np 2
```
Note: model instances inherit both command line arguments and environment variables from the router server.
### Routing requests
Requests are routed according to the requested model name.

View File

@ -80,32 +80,48 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
}
auto files = fs_list_files(dir);
std::unordered_set<std::string> files_model;
std::unordered_set<std::string> files_mmproj;
for (const auto & file : files) {
// TODO: also handle multiple shards
if (string_ends_with(file.name, ".gguf")) {
if (string_starts_with(file.name, "mmproj-")) {
files_mmproj.insert(file.name);
} else {
files_model.insert(file.name);
std::vector<local_model> models;
auto scan_subdir = [&models](const std::string & subdir_path, const std::string name) {
auto files = fs_list(subdir_path, false);
common_file_info model_file;
common_file_info first_shard_file;
common_file_info mmproj_file;
for (const auto & file : files) {
if (string_ends_with(file.name, ".gguf")) {
if (file.name.find("mmproj") != std::string::npos) {
mmproj_file = file;
} else if (file.name.find("-00001-of-") != std::string::npos) {
first_shard_file = file;
} else {
model_file = file;
}
}
}
}
std::vector<local_model> models;
for (const auto & model_file : files_model) {
bool has_mmproj = false;
std::string mmproj_file = "mmproj-" + model_file;
if (files_mmproj.find(mmproj_file) != files_mmproj.end()) {
has_mmproj = true;
}
// single file model
local_model model{
/* name */ model_file,
/* path */ dir + DIRECTORY_SEPARATOR + model_file,
/* path_mmproj */ has_mmproj ? (dir + DIRECTORY_SEPARATOR + mmproj_file) : ""
/* name */ name,
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
/* path_mmproj */ mmproj_file.path // can be empty
};
models.push_back(model);
};
auto files = fs_list(dir, true);
for (const auto & file : files) {
if (file.is_dir) {
scan_subdir(file.path, file.name);
} else if (string_ends_with(file.name, ".gguf")) {
// single file model
std::string name = file.name;
string_replace_all(name, ".gguf", "");
local_model model{
/* name */ name,
/* path */ file.path,
/* path_mmproj */ ""
};
models.push_back(model);
}
}
return models;
}