presets: refactor, allow cascade presets from different sources, add global section (#18169)
* presets: refactor, allow cascade presets from different sources * update docs * fix neg arg handling * fix empty mmproj * also filter out server-controlled args before to_ini() * skip loading custom_models if not specified * fix unset_reserved_args * fix crash on windows
This commit is contained in:
parent
acb73d8340
commit
98c1c7a7bf
|
|
@ -772,6 +772,11 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
|
|||
}
|
||||
auto opt = *arg_to_options[arg];
|
||||
std::string val;
|
||||
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
|
||||
// bool arg (need to reverse the meaning for negative args)
|
||||
bool is_neg = std::find(opt.args_neg.begin(), opt.args_neg.end(), arg) != opt.args_neg.end();
|
||||
val = is_neg ? "0" : "1";
|
||||
}
|
||||
if (opt.value_hint != nullptr) {
|
||||
// arg with single value
|
||||
check_arg(i);
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "preset.h"
|
||||
#include "peg-parser.h"
|
||||
#include "log.h"
|
||||
#include "download.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
|
@ -15,9 +16,13 @@ static std::string rm_leading_dashes(const std::string & str) {
|
|||
return str.substr(pos);
|
||||
}
|
||||
|
||||
std::vector<std::string> common_preset::to_args() const {
|
||||
std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
|
||||
std::vector<std::string> args;
|
||||
|
||||
if (!bin_path.empty()) {
|
||||
args.push_back(bin_path);
|
||||
}
|
||||
|
||||
for (const auto & [opt, value] : options) {
|
||||
args.push_back(opt.args.back()); // use the last arg as the main arg
|
||||
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
|
||||
|
|
@ -63,6 +68,52 @@ std::string common_preset::to_ini() const {
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
void common_preset::set_option(const common_preset_context & ctx, const std::string & env, const std::string & value) {
|
||||
// try if option exists, update it
|
||||
for (auto & [opt, val] : options) {
|
||||
if (opt.env && env == opt.env) {
|
||||
val = value;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// if option does not exist, we need to add it
|
||||
if (ctx.key_to_opt.find(env) == ctx.key_to_opt.end()) {
|
||||
throw std::runtime_error(string_format(
|
||||
"%s: option with env '%s' not found in ctx_params",
|
||||
__func__, env.c_str()
|
||||
));
|
||||
}
|
||||
options[ctx.key_to_opt.at(env)] = value;
|
||||
}
|
||||
|
||||
void common_preset::unset_option(const std::string & env) {
|
||||
for (auto it = options.begin(); it != options.end(); ) {
|
||||
const common_arg & opt = it->first;
|
||||
if (opt.env && env == opt.env) {
|
||||
it = options.erase(it);
|
||||
return;
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool common_preset::get_option(const std::string & env, std::string & value) const {
|
||||
for (const auto & [opt, val] : options) {
|
||||
if (opt.env && env == opt.env) {
|
||||
value = val;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void common_preset::merge(const common_preset & other) {
|
||||
for (const auto & [opt, val] : other.options) {
|
||||
options[opt] = val; // overwrite existing options
|
||||
}
|
||||
}
|
||||
|
||||
static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
|
||||
std::map<std::string, std::map<std::string, std::string>> parsed;
|
||||
|
||||
|
|
@ -172,9 +223,12 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke
|
|||
return value;
|
||||
}
|
||||
|
||||
common_presets common_presets_load(const std::string & path, common_params_context & ctx_params) {
|
||||
common_preset_context::common_preset_context(llama_example ex)
|
||||
: ctx_params(common_params_parser_init(default_params, ex)),
|
||||
key_to_opt(get_map_key_opt(ctx_params)) {}
|
||||
|
||||
common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
|
||||
common_presets out;
|
||||
auto key_to_opt = get_map_key_opt(ctx_params);
|
||||
auto ini_data = parse_ini_from_file(path);
|
||||
|
||||
for (auto section : ini_data) {
|
||||
|
|
@ -188,7 +242,7 @@ common_presets common_presets_load(const std::string & path, common_params_conte
|
|||
for (const auto & [key, value] : section.second) {
|
||||
LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
|
||||
if (key_to_opt.find(key) != key_to_opt.end()) {
|
||||
auto & opt = key_to_opt[key];
|
||||
const auto & opt = key_to_opt.at(key);
|
||||
if (is_bool_arg(opt)) {
|
||||
preset.options[opt] = parse_bool_arg(opt, key, value);
|
||||
} else {
|
||||
|
|
@ -199,8 +253,137 @@ common_presets common_presets_load(const std::string & path, common_params_conte
|
|||
// TODO: maybe warn about unknown key?
|
||||
}
|
||||
}
|
||||
|
||||
if (preset.name == "*") {
|
||||
// handle global preset
|
||||
global = preset;
|
||||
} else {
|
||||
out[preset.name] = preset;
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
common_presets common_preset_context::load_from_cache() const {
|
||||
common_presets out;
|
||||
|
||||
auto cached_models = common_list_cached_models();
|
||||
for (const auto & model : cached_models) {
|
||||
common_preset preset;
|
||||
preset.name = model.to_string();
|
||||
preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string());
|
||||
out[preset.name] = preset;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
struct local_model {
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string path_mmproj;
|
||||
};
|
||||
|
||||
common_presets common_preset_context::load_from_models_dir(const std::string & models_dir) const {
|
||||
if (!std::filesystem::exists(models_dir) || !std::filesystem::is_directory(models_dir)) {
|
||||
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", models_dir.c_str()));
|
||||
}
|
||||
|
||||
std::vector<local_model> models;
|
||||
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
|
||||
auto files = fs_list(subdir_path, false);
|
||||
common_file_info model_file;
|
||||
common_file_info first_shard_file;
|
||||
common_file_info mmproj_file;
|
||||
for (const auto & file : files) {
|
||||
if (string_ends_with(file.name, ".gguf")) {
|
||||
if (file.name.find("mmproj") != std::string::npos) {
|
||||
mmproj_file = file;
|
||||
} else if (file.name.find("-00001-of-") != std::string::npos) {
|
||||
first_shard_file = file;
|
||||
} else {
|
||||
model_file = file;
|
||||
}
|
||||
}
|
||||
}
|
||||
// single file model
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
|
||||
/* path_mmproj */ mmproj_file.path // can be empty
|
||||
};
|
||||
if (!model.path.empty()) {
|
||||
models.push_back(model);
|
||||
}
|
||||
};
|
||||
|
||||
auto files = fs_list(models_dir, true);
|
||||
for (const auto & file : files) {
|
||||
if (file.is_dir) {
|
||||
scan_subdir(file.path, file.name);
|
||||
} else if (string_ends_with(file.name, ".gguf")) {
|
||||
// single file model
|
||||
std::string name = file.name;
|
||||
string_replace_all(name, ".gguf", "");
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ file.path,
|
||||
/* path_mmproj */ ""
|
||||
};
|
||||
models.push_back(model);
|
||||
}
|
||||
}
|
||||
|
||||
// convert local models to presets
|
||||
common_presets out;
|
||||
for (const auto & model : models) {
|
||||
common_preset preset;
|
||||
preset.name = model.name;
|
||||
preset.set_option(*this, "LLAMA_ARG_MODEL", model.path);
|
||||
if (!model.path_mmproj.empty()) {
|
||||
preset.set_option(*this, "LLAMA_ARG_MMPROJ", model.path_mmproj);
|
||||
}
|
||||
out[preset.name] = preset;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
common_preset common_preset_context::load_from_args(int argc, char ** argv) const {
|
||||
common_preset preset;
|
||||
preset.name = COMMON_PRESET_DEFAULT_NAME;
|
||||
|
||||
bool ok = common_params_to_map(argc, argv, ctx_params.ex, preset.options);
|
||||
if (!ok) {
|
||||
throw std::runtime_error("failed to parse CLI arguments into preset");
|
||||
}
|
||||
|
||||
return preset;
|
||||
}
|
||||
|
||||
common_presets common_preset_context::cascade(const common_presets & base, const common_presets & added) const {
|
||||
common_presets out = base; // copy
|
||||
for (const auto & [name, preset_added] : added) {
|
||||
if (out.find(name) != out.end()) {
|
||||
// if exists, merge
|
||||
common_preset & target = out[name];
|
||||
target.merge(preset_added);
|
||||
} else {
|
||||
// otherwise, add directly
|
||||
out[name] = preset_added;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
common_presets common_preset_context::cascade(const common_preset & base, const common_presets & presets) const {
|
||||
common_presets out;
|
||||
for (const auto & [name, preset] : presets) {
|
||||
common_preset tmp = base; // copy
|
||||
tmp.name = name;
|
||||
tmp.merge(preset);
|
||||
out[name] = std::move(tmp);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,20 +13,62 @@
|
|||
|
||||
constexpr const char * COMMON_PRESET_DEFAULT_NAME = "default";
|
||||
|
||||
struct common_preset_context;
|
||||
|
||||
struct common_preset {
|
||||
std::string name;
|
||||
// TODO: support repeated args in the future
|
||||
|
||||
// options are stored as common_arg to string mapping, representing CLI arg and its value
|
||||
std::map<common_arg, std::string> options;
|
||||
|
||||
// convert preset to CLI argument list
|
||||
std::vector<std::string> to_args() const;
|
||||
std::vector<std::string> to_args(const std::string & bin_path = "") const;
|
||||
|
||||
// convert preset to INI format string
|
||||
std::string to_ini() const;
|
||||
|
||||
// TODO: maybe implement to_env() if needed
|
||||
|
||||
// modify preset options where argument is identified by its env variable
|
||||
void set_option(const common_preset_context & ctx, const std::string & env, const std::string & value);
|
||||
|
||||
// unset option by its env variable
|
||||
void unset_option(const std::string & env);
|
||||
|
||||
// get option value by its env variable, return false if not found
|
||||
bool get_option(const std::string & env, std::string & value) const;
|
||||
|
||||
// merge another preset into this one, overwriting existing options
|
||||
void merge(const common_preset & other);
|
||||
};
|
||||
|
||||
// interface for multiple presets in one file
|
||||
using common_presets = std::map<std::string, common_preset>;
|
||||
common_presets common_presets_load(const std::string & path, common_params_context & ctx_params);
|
||||
|
||||
// context for loading and editing presets
|
||||
struct common_preset_context {
|
||||
common_params default_params; // unused for now
|
||||
common_params_context ctx_params;
|
||||
std::map<std::string, common_arg> key_to_opt;
|
||||
common_preset_context(llama_example ex);
|
||||
|
||||
// load presets from INI file
|
||||
common_presets load_from_ini(const std::string & path, common_preset & global) const;
|
||||
|
||||
// generate presets from cached models
|
||||
common_presets load_from_cache() const;
|
||||
|
||||
// generate presets from local models directory
|
||||
// for the directory structure, see "Using multiple models" in server/README.md
|
||||
common_presets load_from_models_dir(const std::string & models_dir) const;
|
||||
|
||||
// generate one preset from CLI arguments
|
||||
common_preset load_from_args(int argc, char ** argv) const;
|
||||
|
||||
// cascade multiple presets if exist on both: base < added
|
||||
// if preset does not exist in base, it will be added without modification
|
||||
common_presets cascade(const common_presets & base, const common_presets & added) const;
|
||||
|
||||
// apply presets over a base preset (same idea as CSS cascading)
|
||||
common_presets cascade(const common_preset & base, const common_presets & presets) const;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1443,6 +1443,12 @@ Example:
|
|||
```ini
|
||||
version = 1
|
||||
|
||||
; (Optional) This section provides global settings shared across all presets.
|
||||
; If the same key is defined in a specific preset, it will override the value in this global section.
|
||||
[*]
|
||||
c = 8192
|
||||
n-gpu-layer = 8
|
||||
|
||||
; If the key corresponds to an existing model on the server,
|
||||
; this will be used as the default config for that model
|
||||
[ggml-org/MY-MODEL-GGUF:Q8_0]
|
||||
|
|
@ -1462,12 +1468,17 @@ model-draft = ./my-models/draft.gguf
|
|||
model-draft = /Users/abc/my-models/draft.gguf
|
||||
|
||||
; If the key does NOT correspond to an existing model,
|
||||
; you need to specify at least the model path
|
||||
; you need to specify at least the model path or HF repo
|
||||
[custom_model]
|
||||
model = /Users/abc/my-awesome-model-Q4_K_M.gguf
|
||||
```
|
||||
|
||||
Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upload loading.
|
||||
Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upon loading.
|
||||
|
||||
The precedence rule for preset options is as follows:
|
||||
1. **Command-line arguments** passed to `llama-server` (highest priority)
|
||||
2. **Model-specific options** defined in the preset file (e.g. `[ggml-org/MY-MODEL...]`)
|
||||
3. **Global options** defined in the preset file (`[*]`)
|
||||
|
||||
### Routing requests
|
||||
|
||||
|
|
|
|||
|
|
@ -82,154 +82,30 @@ static std::filesystem::path get_server_exec_path() {
|
|||
#endif
|
||||
}
|
||||
|
||||
struct local_model {
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string path_mmproj;
|
||||
};
|
||||
|
||||
static std::vector<local_model> list_local_models(const std::string & dir) {
|
||||
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
||||
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
|
||||
}
|
||||
|
||||
std::vector<local_model> models;
|
||||
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
|
||||
auto files = fs_list(subdir_path, false);
|
||||
common_file_info model_file;
|
||||
common_file_info first_shard_file;
|
||||
common_file_info mmproj_file;
|
||||
for (const auto & file : files) {
|
||||
if (string_ends_with(file.name, ".gguf")) {
|
||||
if (file.name.find("mmproj") != std::string::npos) {
|
||||
mmproj_file = file;
|
||||
} else if (file.name.find("-00001-of-") != std::string::npos) {
|
||||
first_shard_file = file;
|
||||
} else {
|
||||
model_file = file;
|
||||
}
|
||||
}
|
||||
}
|
||||
// single file model
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
|
||||
/* path_mmproj */ mmproj_file.path // can be empty
|
||||
};
|
||||
if (!model.path.empty()) {
|
||||
models.push_back(model);
|
||||
}
|
||||
};
|
||||
|
||||
auto files = fs_list(dir, true);
|
||||
for (const auto & file : files) {
|
||||
if (file.is_dir) {
|
||||
scan_subdir(file.path, file.name);
|
||||
} else if (string_ends_with(file.name, ".gguf")) {
|
||||
// single file model
|
||||
std::string name = file.name;
|
||||
string_replace_all(name, ".gguf", "");
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ file.path,
|
||||
/* path_mmproj */ ""
|
||||
};
|
||||
models.push_back(model);
|
||||
}
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
||||
//
|
||||
// server_presets
|
||||
//
|
||||
|
||||
|
||||
server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path)
|
||||
: ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) {
|
||||
if (!presets_path.empty()) {
|
||||
presets = common_presets_load(presets_path, ctx_params);
|
||||
SRV_INF("Loaded %zu presets from %s\n", presets.size(), presets_path.c_str());
|
||||
}
|
||||
|
||||
// populate reserved args (will be appended by the router)
|
||||
for (auto & opt : ctx_params.options) {
|
||||
if (opt.env == nullptr) {
|
||||
continue;
|
||||
}
|
||||
std::string env = opt.env;
|
||||
if (env == "LLAMA_ARG_PORT" ||
|
||||
env == "LLAMA_ARG_HOST" ||
|
||||
env == "LLAMA_ARG_ALIAS" ||
|
||||
env == "LLAMA_ARG_API_KEY" ||
|
||||
env == "LLAMA_ARG_MODELS_DIR" ||
|
||||
env == "LLAMA_ARG_MODELS_MAX" ||
|
||||
env == "LLAMA_ARG_MODELS_PRESET" ||
|
||||
env == "LLAMA_ARG_MODEL" ||
|
||||
env == "LLAMA_ARG_MMPROJ" ||
|
||||
env == "LLAMA_ARG_HF_REPO" ||
|
||||
env == "LLAMA_ARG_NO_MODELS_AUTOLOAD" ||
|
||||
env == "LLAMA_ARG_SSL_KEY_FILE" ||
|
||||
env == "LLAMA_ARG_SSL_CERT_FILE") {
|
||||
control_args[env] = opt;
|
||||
}
|
||||
}
|
||||
|
||||
// read base args from router's argv
|
||||
common_params_to_map(argc, argv, LLAMA_EXAMPLE_SERVER, base_args);
|
||||
|
||||
// remove any router-controlled args from base_args
|
||||
for (const auto & cargs : control_args) {
|
||||
auto it = base_args.find(cargs.second);
|
||||
if (it != base_args.end()) {
|
||||
base_args.erase(it);
|
||||
}
|
||||
static void unset_reserved_args(common_preset & preset, bool unset_model_args) {
|
||||
preset.unset_option("LLAMA_ARG_SSL_KEY_FILE");
|
||||
preset.unset_option("LLAMA_ARG_SSL_CERT_FILE");
|
||||
preset.unset_option("LLAMA_API_KEY");
|
||||
preset.unset_option("LLAMA_ARG_MODELS_DIR");
|
||||
preset.unset_option("LLAMA_ARG_MODELS_MAX");
|
||||
preset.unset_option("LLAMA_ARG_MODELS_PRESET");
|
||||
preset.unset_option("LLAMA_ARG_MODELS_AUTOLOAD");
|
||||
if (unset_model_args) {
|
||||
preset.unset_option("LLAMA_ARG_MODEL");
|
||||
preset.unset_option("LLAMA_ARG_MMPROJ");
|
||||
preset.unset_option("LLAMA_ARG_HF_REPO");
|
||||
}
|
||||
}
|
||||
|
||||
common_preset server_presets::get_preset(const std::string & name) {
|
||||
auto it = presets.find(name);
|
||||
if (it != presets.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return common_preset();
|
||||
}
|
||||
|
||||
void server_presets::render_args(server_model_meta & meta) {
|
||||
common_preset preset = meta.preset; // copy
|
||||
// merging 3 kinds of args:
|
||||
// 1. model-specific args (from preset)
|
||||
// force removing control args if any
|
||||
for (auto & cargs : control_args) {
|
||||
if (preset.options.find(cargs.second) != preset.options.end()) {
|
||||
SRV_WRN("Preset '%s' contains reserved arg '%s', removing it\n", preset.name.c_str(), cargs.second.args[0]);
|
||||
preset.options.erase(cargs.second);
|
||||
}
|
||||
}
|
||||
// 2. base args (from router)
|
||||
// inherit from base args
|
||||
for (const auto & [arg, value] : base_args) {
|
||||
preset.options[arg] = value;
|
||||
}
|
||||
// 3. control args (from router)
|
||||
// set control values
|
||||
preset.options[control_args["LLAMA_ARG_HOST"]] = CHILD_ADDR;
|
||||
preset.options[control_args["LLAMA_ARG_PORT"]] = std::to_string(meta.port);
|
||||
preset.options[control_args["LLAMA_ARG_ALIAS"]] = meta.name;
|
||||
if (meta.in_cache) {
|
||||
preset.options[control_args["LLAMA_ARG_HF_REPO"]] = meta.name;
|
||||
} else {
|
||||
preset.options[control_args["LLAMA_ARG_MODEL"]] = meta.path;
|
||||
if (!meta.path_mmproj.empty()) {
|
||||
preset.options[control_args["LLAMA_ARG_MMPROJ"]] = meta.path_mmproj;
|
||||
}
|
||||
}
|
||||
// disable SSL for child processes (HTTPS already handled by router)
|
||||
preset.options[control_args["LLAMA_ARG_SSL_KEY_FILE"]] = "";
|
||||
preset.options[control_args["LLAMA_ARG_SSL_CERT_FILE"]] = "";
|
||||
meta.args = preset.to_args();
|
||||
// add back the binary path at the front
|
||||
meta.args.insert(meta.args.begin(), get_server_exec_path().string());
|
||||
void server_model_meta::update_args(common_preset_context & ctx_preset, std::string bin_path) {
|
||||
// update params
|
||||
unset_reserved_args(preset, false);
|
||||
preset.set_option(ctx_preset, "LLAMA_ARG_HOST", CHILD_ADDR);
|
||||
preset.set_option(ctx_preset, "LLAMA_ARG_PORT", std::to_string(port));
|
||||
preset.set_option(ctx_preset, "LLAMA_ARG_ALIAS", name);
|
||||
// TODO: maybe validate preset before rendering ?
|
||||
// render args
|
||||
args = preset.to_args(bin_path);
|
||||
}
|
||||
|
||||
//
|
||||
|
|
@ -240,20 +116,22 @@ server_models::server_models(
|
|||
const common_params & params,
|
||||
int argc,
|
||||
char ** argv,
|
||||
char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) {
|
||||
for (int i = 0; i < argc; i++) {
|
||||
base_args.push_back(std::string(argv[i]));
|
||||
}
|
||||
char ** envp)
|
||||
: ctx_preset(LLAMA_EXAMPLE_SERVER),
|
||||
base_params(params),
|
||||
base_preset(ctx_preset.load_from_args(argc, argv)) {
|
||||
for (char ** env = envp; *env != nullptr; env++) {
|
||||
base_env.push_back(std::string(*env));
|
||||
}
|
||||
GGML_ASSERT(!base_args.empty());
|
||||
// clean up base preset
|
||||
unset_reserved_args(base_preset, true);
|
||||
// set binary path
|
||||
try {
|
||||
base_args[0] = get_server_exec_path().string();
|
||||
bin_path = get_server_exec_path().string();
|
||||
} catch (const std::exception & e) {
|
||||
bin_path = argv[0];
|
||||
LOG_WRN("failed to get server executable path: %s\n", e.what());
|
||||
LOG_WRN("using original argv[0] as fallback: %s\n", base_args[0].c_str());
|
||||
LOG_WRN("using original argv[0] as fallback: %s\n", argv[0]);
|
||||
}
|
||||
load_models();
|
||||
}
|
||||
|
|
@ -262,7 +140,7 @@ void server_models::add_model(server_model_meta && meta) {
|
|||
if (mapping.find(meta.name) != mapping.end()) {
|
||||
throw std::runtime_error(string_format("model '%s' appears multiple times", meta.name.c_str()));
|
||||
}
|
||||
presets.render_args(meta); // populate meta.args
|
||||
meta.update_args(ctx_preset, bin_path); // render args
|
||||
std::string name = meta.name;
|
||||
mapping[name] = instance_t{
|
||||
/* subproc */ std::make_shared<subprocess_s>(),
|
||||
|
|
@ -271,86 +149,62 @@ void server_models::add_model(server_model_meta && meta) {
|
|||
};
|
||||
}
|
||||
|
||||
static std::vector<local_model> list_custom_path_models(server_presets & presets) {
|
||||
// detect any custom-path models in presets
|
||||
std::vector<local_model> custom_models;
|
||||
for (auto & [model_name, preset] : presets.presets) {
|
||||
local_model model;
|
||||
model.name = model_name;
|
||||
std::vector<common_arg> to_erase;
|
||||
for (auto & [arg, value] : preset.options) {
|
||||
std::string env(arg.env ? arg.env : "");
|
||||
if (env == "LLAMA_ARG_MODEL") {
|
||||
model.path = value;
|
||||
to_erase.push_back(arg);
|
||||
}
|
||||
if (env == "LLAMA_ARG_MMPROJ") {
|
||||
model.path_mmproj = value;
|
||||
to_erase.push_back(arg);
|
||||
}
|
||||
}
|
||||
for (auto & arg : to_erase) {
|
||||
preset.options.erase(arg);
|
||||
}
|
||||
if (!model.name.empty() && !model.path.empty()) {
|
||||
custom_models.push_back(model);
|
||||
}
|
||||
}
|
||||
return custom_models;
|
||||
}
|
||||
|
||||
// TODO: allow refreshing cached model list
|
||||
void server_models::load_models() {
|
||||
// loading models from 3 sources:
|
||||
// 1. cached models
|
||||
auto cached_models = common_list_cached_models();
|
||||
for (const auto & model : cached_models) {
|
||||
server_model_meta meta{
|
||||
/* preset */ presets.get_preset(model.to_string()),
|
||||
/* name */ model.to_string(),
|
||||
/* path */ model.manifest_path,
|
||||
/* path_mmproj */ "", // auto-detected when loading
|
||||
/* in_cache */ true,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED,
|
||||
/* last_used */ 0,
|
||||
/* args */ std::vector<std::string>(),
|
||||
/* exit_code */ 0
|
||||
};
|
||||
add_model(std::move(meta));
|
||||
}
|
||||
// 2. local models specificed via --models-dir
|
||||
common_presets cached_models = ctx_preset.load_from_cache();
|
||||
SRV_INF("Loaded %zu cached model presets\n", cached_models.size());
|
||||
// 2. local models from --models-dir
|
||||
common_presets local_models;
|
||||
if (!base_params.models_dir.empty()) {
|
||||
auto local_models = list_local_models(base_params.models_dir);
|
||||
for (const auto & model : local_models) {
|
||||
if (mapping.find(model.name) != mapping.end()) {
|
||||
// already exists in cached models, skip
|
||||
continue;
|
||||
local_models = ctx_preset.load_from_models_dir(base_params.models_dir);
|
||||
SRV_INF("Loaded %zu local model presets from %s\n", local_models.size(), base_params.models_dir.c_str());
|
||||
}
|
||||
// 3. custom-path models from presets
|
||||
common_preset global = {};
|
||||
common_presets custom_presets = {};
|
||||
if (!base_params.models_preset.empty()) {
|
||||
custom_presets = ctx_preset.load_from_ini(base_params.models_preset, global);
|
||||
SRV_INF("Loaded %zu custom model presets from %s\n", custom_presets.size(), base_params.models_preset.c_str());
|
||||
}
|
||||
|
||||
// cascade, apply global preset first
|
||||
cached_models = ctx_preset.cascade(global, cached_models);
|
||||
local_models = ctx_preset.cascade(global, local_models);
|
||||
custom_presets = ctx_preset.cascade(global, custom_presets);
|
||||
|
||||
// note: if a model exists in both cached and local, local takes precedence
|
||||
common_presets final_presets;
|
||||
for (const auto & [name, preset] : cached_models) {
|
||||
final_presets[name] = preset;
|
||||
}
|
||||
for (const auto & [name, preset] : local_models) {
|
||||
final_presets[name] = preset;
|
||||
}
|
||||
|
||||
// process custom presets from INI
|
||||
for (const auto & [name, custom] : custom_presets) {
|
||||
if (final_presets.find(name) != final_presets.end()) {
|
||||
// apply custom config if exists
|
||||
common_preset & target = final_presets[name];
|
||||
target.merge(custom);
|
||||
} else {
|
||||
// otherwise add directly
|
||||
final_presets[name] = custom;
|
||||
}
|
||||
}
|
||||
|
||||
// server base preset from CLI args take highest precedence
|
||||
for (auto & [name, preset] : final_presets) {
|
||||
preset.merge(base_preset);
|
||||
}
|
||||
|
||||
// convert presets to server_model_meta and add to mapping
|
||||
for (const auto & preset : final_presets) {
|
||||
server_model_meta meta{
|
||||
/* preset */ presets.get_preset(model.name),
|
||||
/* name */ model.name,
|
||||
/* path */ model.path,
|
||||
/* path_mmproj */ model.path_mmproj,
|
||||
/* in_cache */ false,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED,
|
||||
/* last_used */ 0,
|
||||
/* args */ std::vector<std::string>(),
|
||||
/* exit_code */ 0
|
||||
};
|
||||
add_model(std::move(meta));
|
||||
}
|
||||
}
|
||||
// 3. custom-path models specified in presets
|
||||
auto custom_models = list_custom_path_models(presets);
|
||||
for (const auto & model : custom_models) {
|
||||
server_model_meta meta{
|
||||
/* preset */ presets.get_preset(model.name),
|
||||
/* name */ model.name,
|
||||
/* path */ model.path,
|
||||
/* path_mmproj */ model.path_mmproj,
|
||||
/* in_cache */ false,
|
||||
/* preset */ preset.second,
|
||||
/* name */ preset.first,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED,
|
||||
/* last_used */ 0,
|
||||
|
|
@ -359,10 +213,18 @@ void server_models::load_models() {
|
|||
};
|
||||
add_model(std::move(meta));
|
||||
}
|
||||
|
||||
// log available models
|
||||
{
|
||||
std::unordered_set<std::string> custom_names;
|
||||
for (const auto & [name, preset] : custom_presets) {
|
||||
custom_names.insert(name);
|
||||
}
|
||||
SRV_INF("Available models (%zu) (*: custom preset)\n", mapping.size());
|
||||
for (const auto & [name, inst] : mapping) {
|
||||
SRV_INF(" %c %s\n", inst.meta.preset.name.empty() ? ' ' : '*', name.c_str());
|
||||
bool has_custom = custom_names.find(name) != custom_names.end();
|
||||
SRV_INF(" %c %s\n", has_custom ? '*' : ' ', name.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -526,7 +388,7 @@ void server_models::load(const std::string & name) {
|
|||
{
|
||||
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
|
||||
|
||||
presets.render_args(inst.meta); // update meta.args
|
||||
inst.meta.update_args(ctx_preset, bin_path); // render args
|
||||
|
||||
std::vector<std::string> child_args = inst.meta.args; // copy
|
||||
std::vector<std::string> child_env = base_env; // copy
|
||||
|
|
@ -877,7 +739,12 @@ void server_models_routes::init_routes() {
|
|||
{"args", meta.args},
|
||||
};
|
||||
if (!meta.preset.name.empty()) {
|
||||
status["preset"] = meta.preset.to_ini();
|
||||
common_preset preset_copy = meta.preset;
|
||||
unset_reserved_args(preset_copy, false);
|
||||
preset_copy.unset_option("LLAMA_ARG_HOST");
|
||||
preset_copy.unset_option("LLAMA_ARG_PORT");
|
||||
preset_copy.unset_option("LLAMA_ARG_ALIAS");
|
||||
status["preset"] = preset_copy.to_ini();
|
||||
}
|
||||
if (meta.is_failed()) {
|
||||
status["exit_code"] = meta.exit_code;
|
||||
|
|
@ -888,8 +755,6 @@ void server_models_routes::init_routes() {
|
|||
{"object", "model"}, // for OAI-compat
|
||||
{"owned_by", "llamacpp"}, // for OAI-compat
|
||||
{"created", t}, // for OAI-compat
|
||||
{"in_cache", meta.in_cache},
|
||||
{"path", meta.path},
|
||||
{"status", status},
|
||||
// TODO: add other fields, may require reading GGUF metadata
|
||||
});
|
||||
|
|
|
|||
|
|
@ -51,9 +51,6 @@ static std::string server_model_status_to_string(server_model_status status) {
|
|||
struct server_model_meta {
|
||||
common_preset preset;
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string path_mmproj; // only available if in_cache=false
|
||||
bool in_cache = false; // if true, use -hf; use -m otherwise
|
||||
int port = 0;
|
||||
server_model_status status = SERVER_MODEL_STATUS_UNLOADED;
|
||||
int64_t last_used = 0; // for LRU unloading
|
||||
|
|
@ -67,19 +64,8 @@ struct server_model_meta {
|
|||
bool is_failed() const {
|
||||
return status == SERVER_MODEL_STATUS_UNLOADED && exit_code != 0;
|
||||
}
|
||||
};
|
||||
|
||||
// the server_presets struct holds the presets read from presets.ini
|
||||
// as well as base args from the router server
|
||||
struct server_presets {
|
||||
common_presets presets;
|
||||
common_params_context ctx_params;
|
||||
std::map<common_arg, std::string> base_args;
|
||||
std::map<std::string, common_arg> control_args; // args reserved for server control
|
||||
|
||||
server_presets(int argc, char ** argv, common_params & base_params, const std::string & models_dir);
|
||||
common_preset get_preset(const std::string & name);
|
||||
void render_args(server_model_meta & meta);
|
||||
void update_args(common_preset_context & ctx_presets, std::string bin_path);
|
||||
};
|
||||
|
||||
struct subprocess_s;
|
||||
|
|
@ -97,11 +83,12 @@ private:
|
|||
std::condition_variable cv;
|
||||
std::map<std::string, instance_t> mapping;
|
||||
|
||||
common_params base_params;
|
||||
std::vector<std::string> base_args;
|
||||
std::vector<std::string> base_env;
|
||||
common_preset_context ctx_preset;
|
||||
|
||||
server_presets presets;
|
||||
common_params base_params;
|
||||
std::string bin_path;
|
||||
std::vector<std::string> base_env;
|
||||
common_preset base_preset; // base preset from llama-server CLI args
|
||||
|
||||
void update_meta(const std::string & name, const server_model_meta & meta);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue