better --models-dir
This commit is contained in:
parent
7cd929076d
commit
7241558835
|
|
@ -911,7 +911,7 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|||
return cache_directory + filename;
|
||||
}
|
||||
|
||||
std::vector<common_file_info> fs_list_files(const std::string & path) {
|
||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
|
||||
std::vector<common_file_info> files;
|
||||
if (path.empty()) return files;
|
||||
|
||||
|
|
@ -926,14 +926,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|||
const auto & p = entry.path();
|
||||
if (std::filesystem::is_regular_file(p)) {
|
||||
common_file_info info;
|
||||
info.path = p.string();
|
||||
info.name = p.filename().string();
|
||||
info.path = p.string();
|
||||
info.name = p.filename().string();
|
||||
info.is_dir = false;
|
||||
try {
|
||||
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
||||
} catch (const std::filesystem::filesystem_error &) {
|
||||
info.size = 0;
|
||||
}
|
||||
files.push_back(std::move(info));
|
||||
} else if (include_directories && std::filesystem::is_directory(p)) {
|
||||
common_file_info info;
|
||||
info.path = p.string();
|
||||
info.name = p.filename().string();
|
||||
info.size = 0; // Directories have no size
|
||||
info.is_dir = true;
|
||||
files.push_back(std::move(info));
|
||||
}
|
||||
} catch (const std::filesystem::filesystem_error &) {
|
||||
// skip entries we cannot inspect
|
||||
|
|
|
|||
|
|
@ -625,8 +625,9 @@ struct common_file_info {
|
|||
std::string path;
|
||||
std::string name;
|
||||
size_t size = 0; // in bytes
|
||||
bool is_dir = false;
|
||||
};
|
||||
std::vector<common_file_info> fs_list_files(const std::string & path);
|
||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
||||
|
||||
//
|
||||
// Model utils
|
||||
|
|
|
|||
|
|
@ -1047,7 +1047,7 @@ std::string common_docker_resolve_model(const std::string &) {
|
|||
std::vector<common_cached_model_info> common_list_cached_models() {
|
||||
std::vector<common_cached_model_info> models;
|
||||
const std::string cache_dir = fs_get_cache_directory();
|
||||
const std::vector<common_file_info> files = fs_list_files(cache_dir);
|
||||
const std::vector<common_file_info> files = fs_list(cache_dir, false);
|
||||
for (const auto & file : files) {
|
||||
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
||||
common_cached_model_info model_info;
|
||||
|
|
|
|||
|
|
@ -1364,18 +1364,32 @@ llama-server -hf <user>/<model>:<tag>
|
|||
|
||||
*The server must be restarted after adding a new model.*
|
||||
|
||||
Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Files prefixed with `mmproj-` will automatically be treated as multimodal projection files **for the model with the matching base name**:
|
||||
Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command:
|
||||
|
||||
```sh
|
||||
llama-3.2-1b-Q4_K_M.gguf
|
||||
gemma-3-4b-it-Q8_0.gguf
|
||||
mmproj-gemma-3-4b-it-Q8_0.gguf # must be "mmproj-" + text model filename
|
||||
llama-server --models-dir ./models_directory
|
||||
```
|
||||
|
||||
Example:
|
||||
If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this:
|
||||
|
||||
```sh
|
||||
llama-server --models-dir ./path/to/models
|
||||
models_directory
|
||||
│
|
||||
│ # single file
|
||||
├─ llama-3.2-1b-Q4_K_M.gguf
|
||||
├─ Qwen3-8B-Q4_K_M.gguf
|
||||
│
|
||||
│ # multimodal
|
||||
├─ gemma-3-4b-it-Q8_0
|
||||
│ ├─ gemma-3-4b-it-Q8_0.gguf
|
||||
│ └─ mmproj-F16.gguf # file name must start with "mmproj"
|
||||
│
|
||||
│ # multi-shard
|
||||
├─ gemma-3-4b-it-Q8_0
|
||||
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf
|
||||
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf
|
||||
│ ├─ ...
|
||||
│ └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf
|
||||
```
|
||||
|
||||
You may also specify default arguments that will be passed to every loaded model instance:
|
||||
|
|
@ -1384,6 +1398,8 @@ You may also specify default arguments that will be passed to every loaded model
|
|||
llama-server -ctx 8192 -n 1024 -np 2
|
||||
```
|
||||
|
||||
Note: model instances inherit both command line arguments and environment variables from the router server.
|
||||
|
||||
### Routing requests
|
||||
|
||||
Requests are routed according to the requested model name.
|
||||
|
|
|
|||
|
|
@ -80,32 +80,48 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
|
|||
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
||||
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
|
||||
}
|
||||
auto files = fs_list_files(dir);
|
||||
std::unordered_set<std::string> files_model;
|
||||
std::unordered_set<std::string> files_mmproj;
|
||||
for (const auto & file : files) {
|
||||
// TODO: also handle multiple shards
|
||||
if (string_ends_with(file.name, ".gguf")) {
|
||||
if (string_starts_with(file.name, "mmproj-")) {
|
||||
files_mmproj.insert(file.name);
|
||||
} else {
|
||||
files_model.insert(file.name);
|
||||
|
||||
std::vector<local_model> models;
|
||||
auto scan_subdir = [&models](const std::string & subdir_path, const std::string name) {
|
||||
auto files = fs_list(subdir_path, false);
|
||||
common_file_info model_file;
|
||||
common_file_info first_shard_file;
|
||||
common_file_info mmproj_file;
|
||||
for (const auto & file : files) {
|
||||
if (string_ends_with(file.name, ".gguf")) {
|
||||
if (file.name.find("mmproj") != std::string::npos) {
|
||||
mmproj_file = file;
|
||||
} else if (file.name.find("-00001-of-") != std::string::npos) {
|
||||
first_shard_file = file;
|
||||
} else {
|
||||
model_file = file;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<local_model> models;
|
||||
for (const auto & model_file : files_model) {
|
||||
bool has_mmproj = false;
|
||||
std::string mmproj_file = "mmproj-" + model_file;
|
||||
if (files_mmproj.find(mmproj_file) != files_mmproj.end()) {
|
||||
has_mmproj = true;
|
||||
}
|
||||
// single file model
|
||||
local_model model{
|
||||
/* name */ model_file,
|
||||
/* path */ dir + DIRECTORY_SEPARATOR + model_file,
|
||||
/* path_mmproj */ has_mmproj ? (dir + DIRECTORY_SEPARATOR + mmproj_file) : ""
|
||||
/* name */ name,
|
||||
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
|
||||
/* path_mmproj */ mmproj_file.path // can be empty
|
||||
};
|
||||
models.push_back(model);
|
||||
};
|
||||
|
||||
auto files = fs_list(dir, true);
|
||||
for (const auto & file : files) {
|
||||
if (file.is_dir) {
|
||||
scan_subdir(file.path, file.name);
|
||||
} else if (string_ends_with(file.name, ".gguf")) {
|
||||
// single file model
|
||||
std::string name = file.name;
|
||||
string_replace_all(name, ".gguf", "");
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ file.path,
|
||||
/* path_mmproj */ ""
|
||||
};
|
||||
models.push_back(model);
|
||||
}
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue