better --models-dir
This commit is contained in:
parent
7cd929076d
commit
7241558835
|
|
@ -911,7 +911,7 @@ std::string fs_get_cache_file(const std::string & filename) {
|
||||||
return cache_directory + filename;
|
return cache_directory + filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<common_file_info> fs_list_files(const std::string & path) {
|
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
|
||||||
std::vector<common_file_info> files;
|
std::vector<common_file_info> files;
|
||||||
if (path.empty()) return files;
|
if (path.empty()) return files;
|
||||||
|
|
||||||
|
|
@ -926,14 +926,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
|
||||||
const auto & p = entry.path();
|
const auto & p = entry.path();
|
||||||
if (std::filesystem::is_regular_file(p)) {
|
if (std::filesystem::is_regular_file(p)) {
|
||||||
common_file_info info;
|
common_file_info info;
|
||||||
info.path = p.string();
|
info.path = p.string();
|
||||||
info.name = p.filename().string();
|
info.name = p.filename().string();
|
||||||
|
info.is_dir = false;
|
||||||
try {
|
try {
|
||||||
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
||||||
} catch (const std::filesystem::filesystem_error &) {
|
} catch (const std::filesystem::filesystem_error &) {
|
||||||
info.size = 0;
|
info.size = 0;
|
||||||
}
|
}
|
||||||
files.push_back(std::move(info));
|
files.push_back(std::move(info));
|
||||||
|
} else if (include_directories && std::filesystem::is_directory(p)) {
|
||||||
|
common_file_info info;
|
||||||
|
info.path = p.string();
|
||||||
|
info.name = p.filename().string();
|
||||||
|
info.size = 0; // Directories have no size
|
||||||
|
info.is_dir = true;
|
||||||
|
files.push_back(std::move(info));
|
||||||
}
|
}
|
||||||
} catch (const std::filesystem::filesystem_error &) {
|
} catch (const std::filesystem::filesystem_error &) {
|
||||||
// skip entries we cannot inspect
|
// skip entries we cannot inspect
|
||||||
|
|
|
||||||
|
|
@ -625,8 +625,9 @@ struct common_file_info {
|
||||||
std::string path;
|
std::string path;
|
||||||
std::string name;
|
std::string name;
|
||||||
size_t size = 0; // in bytes
|
size_t size = 0; // in bytes
|
||||||
|
bool is_dir = false;
|
||||||
};
|
};
|
||||||
std::vector<common_file_info> fs_list_files(const std::string & path);
|
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Model utils
|
// Model utils
|
||||||
|
|
|
||||||
|
|
@ -1047,7 +1047,7 @@ std::string common_docker_resolve_model(const std::string &) {
|
||||||
std::vector<common_cached_model_info> common_list_cached_models() {
|
std::vector<common_cached_model_info> common_list_cached_models() {
|
||||||
std::vector<common_cached_model_info> models;
|
std::vector<common_cached_model_info> models;
|
||||||
const std::string cache_dir = fs_get_cache_directory();
|
const std::string cache_dir = fs_get_cache_directory();
|
||||||
const std::vector<common_file_info> files = fs_list_files(cache_dir);
|
const std::vector<common_file_info> files = fs_list(cache_dir, false);
|
||||||
for (const auto & file : files) {
|
for (const auto & file : files) {
|
||||||
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
||||||
common_cached_model_info model_info;
|
common_cached_model_info model_info;
|
||||||
|
|
|
||||||
|
|
@ -1364,18 +1364,32 @@ llama-server -hf <user>/<model>:<tag>
|
||||||
|
|
||||||
*The server must be restarted after adding a new model.*
|
*The server must be restarted after adding a new model.*
|
||||||
|
|
||||||
Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Files prefixed with `mmproj-` will automatically be treated as multimodal projection files **for the model with the matching base name**:
|
Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
llama-3.2-1b-Q4_K_M.gguf
|
llama-server --models-dir ./models_directory
|
||||||
gemma-3-4b-it-Q8_0.gguf
|
|
||||||
mmproj-gemma-3-4b-it-Q8_0.gguf # must be "mmproj-" + text model filename
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Example:
|
If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
llama-server --models-dir ./path/to/models
|
models_directory
|
||||||
|
│
|
||||||
|
│ # single file
|
||||||
|
├─ llama-3.2-1b-Q4_K_M.gguf
|
||||||
|
├─ Qwen3-8B-Q4_K_M.gguf
|
||||||
|
│
|
||||||
|
│ # multimodal
|
||||||
|
├─ gemma-3-4b-it-Q8_0
|
||||||
|
│ ├─ gemma-3-4b-it-Q8_0.gguf
|
||||||
|
│ └─ mmproj-F16.gguf # file name must start with "mmproj"
|
||||||
|
│
|
||||||
|
│ # multi-shard
|
||||||
|
├─ gemma-3-4b-it-Q8_0
|
||||||
|
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf
|
||||||
|
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf
|
||||||
|
│ ├─ ...
|
||||||
|
│ └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf
|
||||||
```
|
```
|
||||||
|
|
||||||
You may also specify default arguments that will be passed to every loaded model instance:
|
You may also specify default arguments that will be passed to every loaded model instance:
|
||||||
|
|
@ -1384,6 +1398,8 @@ You may also specify default arguments that will be passed to every loaded model
|
||||||
llama-server -ctx 8192 -n 1024 -np 2
|
llama-server -ctx 8192 -n 1024 -np 2
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Note: model instances inherit both command line arguments and environment variables from the router server.
|
||||||
|
|
||||||
### Routing requests
|
### Routing requests
|
||||||
|
|
||||||
Requests are routed according to the requested model name.
|
Requests are routed according to the requested model name.
|
||||||
|
|
|
||||||
|
|
@ -80,32 +80,48 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
|
||||||
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
||||||
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
|
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
|
||||||
}
|
}
|
||||||
auto files = fs_list_files(dir);
|
|
||||||
std::unordered_set<std::string> files_model;
|
std::vector<local_model> models;
|
||||||
std::unordered_set<std::string> files_mmproj;
|
auto scan_subdir = [&models](const std::string & subdir_path, const std::string name) {
|
||||||
for (const auto & file : files) {
|
auto files = fs_list(subdir_path, false);
|
||||||
// TODO: also handle multiple shards
|
common_file_info model_file;
|
||||||
if (string_ends_with(file.name, ".gguf")) {
|
common_file_info first_shard_file;
|
||||||
if (string_starts_with(file.name, "mmproj-")) {
|
common_file_info mmproj_file;
|
||||||
files_mmproj.insert(file.name);
|
for (const auto & file : files) {
|
||||||
} else {
|
if (string_ends_with(file.name, ".gguf")) {
|
||||||
files_model.insert(file.name);
|
if (file.name.find("mmproj") != std::string::npos) {
|
||||||
|
mmproj_file = file;
|
||||||
|
} else if (file.name.find("-00001-of-") != std::string::npos) {
|
||||||
|
first_shard_file = file;
|
||||||
|
} else {
|
||||||
|
model_file = file;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
// single file model
|
||||||
std::vector<local_model> models;
|
|
||||||
for (const auto & model_file : files_model) {
|
|
||||||
bool has_mmproj = false;
|
|
||||||
std::string mmproj_file = "mmproj-" + model_file;
|
|
||||||
if (files_mmproj.find(mmproj_file) != files_mmproj.end()) {
|
|
||||||
has_mmproj = true;
|
|
||||||
}
|
|
||||||
local_model model{
|
local_model model{
|
||||||
/* name */ model_file,
|
/* name */ name,
|
||||||
/* path */ dir + DIRECTORY_SEPARATOR + model_file,
|
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
|
||||||
/* path_mmproj */ has_mmproj ? (dir + DIRECTORY_SEPARATOR + mmproj_file) : ""
|
/* path_mmproj */ mmproj_file.path // can be empty
|
||||||
};
|
};
|
||||||
models.push_back(model);
|
models.push_back(model);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto files = fs_list(dir, true);
|
||||||
|
for (const auto & file : files) {
|
||||||
|
if (file.is_dir) {
|
||||||
|
scan_subdir(file.path, file.name);
|
||||||
|
} else if (string_ends_with(file.name, ".gguf")) {
|
||||||
|
// single file model
|
||||||
|
std::string name = file.name;
|
||||||
|
string_replace_all(name, ".gguf", "");
|
||||||
|
local_model model{
|
||||||
|
/* name */ name,
|
||||||
|
/* path */ file.path,
|
||||||
|
/* path_mmproj */ ""
|
||||||
|
};
|
||||||
|
models.push_back(model);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return models;
|
return models;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue