add --models-dir param
This commit is contained in:
parent
a2e912cf35
commit
032b9ff4a9
|
|
@ -2474,6 +2474,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
}
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--models-dir"}, "PATH",
|
||||
"directory containing models for the router server (default: disabled)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.models_dir = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--max-models"}, "N",
|
||||
string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.max_models),
|
||||
[](common_params & params, int value) {
|
||||
params.max_models = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--jinja"},
|
||||
"use jinja template for chat (default: disabled)",
|
||||
|
|
|
|||
|
|
@ -460,6 +460,10 @@ struct common_params {
|
|||
bool endpoint_props = false; // only control POST requests, not GET
|
||||
bool endpoint_metrics = false;
|
||||
|
||||
// router server configs
|
||||
std::string models_dir = ""; // directory containing models for the router server
|
||||
int max_models = 4; // maximum number of models to load simultaneously
|
||||
|
||||
bool log_json = false;
|
||||
|
||||
std::string slot_save_path;
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
#include <cstring>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <unordered_set>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <winsock2.h>
|
||||
|
|
@ -69,6 +70,46 @@ static std::filesystem::path get_server_exec_path() {
|
|||
#endif
|
||||
}
|
||||
|
||||
struct local_model {
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string path_mmproj;
|
||||
};
|
||||
|
||||
static std::vector<local_model> list_local_models(const std::string & dir) {
|
||||
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
||||
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
|
||||
}
|
||||
auto files = fs_list_files(dir);
|
||||
std::unordered_set<std::string> files_model;
|
||||
std::unordered_set<std::string> files_mmproj;
|
||||
for (const auto & file : files) {
|
||||
// TODO: also handle multiple shards
|
||||
if (string_ends_with(file.name, ".gguf")) {
|
||||
if (string_starts_with(file.name, "mmproj-")) {
|
||||
files_mmproj.insert(file.name);
|
||||
} else {
|
||||
files_model.insert(file.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<local_model> models;
|
||||
for (const auto & model_file : files_model) {
|
||||
bool has_mmproj = false;
|
||||
std::string mmproj_file = "mmproj-" + model_file;
|
||||
if (files_mmproj.find(mmproj_file) != files_mmproj.end()) {
|
||||
has_mmproj = true;
|
||||
}
|
||||
local_model model{
|
||||
/* name */ model_file,
|
||||
/* path */ dir + DIRECTORY_SEPARATOR + model_file,
|
||||
/* path_mmproj */ has_mmproj ? (dir + DIRECTORY_SEPARATOR + mmproj_file) : ""
|
||||
};
|
||||
models.push_back(model);
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
||||
//
|
||||
// server_models
|
||||
//
|
||||
|
|
@ -85,12 +126,13 @@ server_models::server_models(
|
|||
base_env.push_back(std::string(*env));
|
||||
}
|
||||
// TODO: allow refreshing cached model list
|
||||
// add cached models
|
||||
auto cached_models = common_list_cached_models();
|
||||
for (const auto & model : cached_models) {
|
||||
server_model_meta meta{
|
||||
/* name */ model.to_string(),
|
||||
/* path */ model.manifest_path,
|
||||
/* path_mmproj */ "",
|
||||
/* path_mmproj */ "", // auto-detected when loading
|
||||
/* in_cache */ true,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED
|
||||
|
|
@ -101,6 +143,29 @@ server_models::server_models(
|
|||
/* meta */ meta
|
||||
};
|
||||
}
|
||||
// add local models specificed via --models-dir
|
||||
if (!params.models_dir.empty()) {
|
||||
auto local_models = list_local_models(params.models_dir);
|
||||
for (const auto & model : local_models) {
|
||||
if (mapping.find(model.name) != mapping.end()) {
|
||||
// already exists in cached models, skip
|
||||
continue;
|
||||
}
|
||||
server_model_meta meta{
|
||||
/* name */ model.name,
|
||||
/* path */ model.path,
|
||||
/* path_mmproj */ model.path_mmproj,
|
||||
/* in_cache */ false,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED
|
||||
};
|
||||
mapping[meta.name] = instance_t{
|
||||
/* subproc */ std::make_shared<subprocess_s>(),
|
||||
/* th */ std::thread(),
|
||||
/* meta */ meta
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::update_meta(const std::string & name, const server_model_meta & meta) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue