presets: refactor, allow cascade presets from different sources, add global section (#18169)

* presets: refactor, allow cascade presets from different sources

* update docs

* fix neg arg handling

* fix empty mmproj

* also filter out server-controlled args before to_ini()

* skip loading custom_models if not specified

* fix unset_reserved_args

* fix crash on windows
This commit is contained in:
Xuan-Son Nguyen 2025-12-19 12:08:20 +01:00 committed by GitHub
parent acb73d8340
commit 98c1c7a7bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 353 additions and 260 deletions

View File

@ -772,6 +772,11 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
}
auto opt = *arg_to_options[arg];
std::string val;
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
// bool arg (need to reverse the meaning for negative args)
bool is_neg = std::find(opt.args_neg.begin(), opt.args_neg.end(), arg) != opt.args_neg.end();
val = is_neg ? "0" : "1";
}
if (opt.value_hint != nullptr) {
// arg with single value
check_arg(i);

View File

@ -2,6 +2,7 @@
#include "preset.h"
#include "peg-parser.h"
#include "log.h"
#include "download.h"
#include <fstream>
#include <sstream>
@ -15,9 +16,13 @@ static std::string rm_leading_dashes(const std::string & str) {
return str.substr(pos);
}
std::vector<std::string> common_preset::to_args() const {
std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
std::vector<std::string> args;
if (!bin_path.empty()) {
args.push_back(bin_path);
}
for (const auto & [opt, value] : options) {
args.push_back(opt.args.back()); // use the last arg as the main arg
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
@ -63,6 +68,52 @@ std::string common_preset::to_ini() const {
return ss.str();
}
void common_preset::set_option(const common_preset_context & ctx, const std::string & env, const std::string & value) {
// try if option exists, update it
for (auto & [opt, val] : options) {
if (opt.env && env == opt.env) {
val = value;
return;
}
}
// if option does not exist, we need to add it
if (ctx.key_to_opt.find(env) == ctx.key_to_opt.end()) {
throw std::runtime_error(string_format(
"%s: option with env '%s' not found in ctx_params",
__func__, env.c_str()
));
}
options[ctx.key_to_opt.at(env)] = value;
}
void common_preset::unset_option(const std::string & env) {
for (auto it = options.begin(); it != options.end(); ) {
const common_arg & opt = it->first;
if (opt.env && env == opt.env) {
it = options.erase(it);
return;
} else {
++it;
}
}
}
bool common_preset::get_option(const std::string & env, std::string & value) const {
for (const auto & [opt, val] : options) {
if (opt.env && env == opt.env) {
value = val;
return true;
}
}
return false;
}
void common_preset::merge(const common_preset & other) {
for (const auto & [opt, val] : other.options) {
options[opt] = val; // overwrite existing options
}
}
static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
std::map<std::string, std::map<std::string, std::string>> parsed;
@ -172,9 +223,12 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke
return value;
}
common_presets common_presets_load(const std::string & path, common_params_context & ctx_params) {
common_preset_context::common_preset_context(llama_example ex)
: ctx_params(common_params_parser_init(default_params, ex)),
key_to_opt(get_map_key_opt(ctx_params)) {}
common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
common_presets out;
auto key_to_opt = get_map_key_opt(ctx_params);
auto ini_data = parse_ini_from_file(path);
for (auto section : ini_data) {
@ -188,7 +242,7 @@ common_presets common_presets_load(const std::string & path, common_params_conte
for (const auto & [key, value] : section.second) {
LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
if (key_to_opt.find(key) != key_to_opt.end()) {
auto & opt = key_to_opt[key];
const auto & opt = key_to_opt.at(key);
if (is_bool_arg(opt)) {
preset.options[opt] = parse_bool_arg(opt, key, value);
} else {
@ -199,8 +253,137 @@ common_presets common_presets_load(const std::string & path, common_params_conte
// TODO: maybe warn about unknown key?
}
}
if (preset.name == "*") {
// handle global preset
global = preset;
} else {
out[preset.name] = preset;
}
}
return out;
}
common_presets common_preset_context::load_from_cache() const {
common_presets out;
auto cached_models = common_list_cached_models();
for (const auto & model : cached_models) {
common_preset preset;
preset.name = model.to_string();
preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string());
out[preset.name] = preset;
}
return out;
}
struct local_model {
std::string name;
std::string path;
std::string path_mmproj;
};
common_presets common_preset_context::load_from_models_dir(const std::string & models_dir) const {
if (!std::filesystem::exists(models_dir) || !std::filesystem::is_directory(models_dir)) {
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", models_dir.c_str()));
}
std::vector<local_model> models;
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
auto files = fs_list(subdir_path, false);
common_file_info model_file;
common_file_info first_shard_file;
common_file_info mmproj_file;
for (const auto & file : files) {
if (string_ends_with(file.name, ".gguf")) {
if (file.name.find("mmproj") != std::string::npos) {
mmproj_file = file;
} else if (file.name.find("-00001-of-") != std::string::npos) {
first_shard_file = file;
} else {
model_file = file;
}
}
}
// single file model
local_model model{
/* name */ name,
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
/* path_mmproj */ mmproj_file.path // can be empty
};
if (!model.path.empty()) {
models.push_back(model);
}
};
auto files = fs_list(models_dir, true);
for (const auto & file : files) {
if (file.is_dir) {
scan_subdir(file.path, file.name);
} else if (string_ends_with(file.name, ".gguf")) {
// single file model
std::string name = file.name;
string_replace_all(name, ".gguf", "");
local_model model{
/* name */ name,
/* path */ file.path,
/* path_mmproj */ ""
};
models.push_back(model);
}
}
// convert local models to presets
common_presets out;
for (const auto & model : models) {
common_preset preset;
preset.name = model.name;
preset.set_option(*this, "LLAMA_ARG_MODEL", model.path);
if (!model.path_mmproj.empty()) {
preset.set_option(*this, "LLAMA_ARG_MMPROJ", model.path_mmproj);
}
out[preset.name] = preset;
}
return out;
}
common_preset common_preset_context::load_from_args(int argc, char ** argv) const {
common_preset preset;
preset.name = COMMON_PRESET_DEFAULT_NAME;
bool ok = common_params_to_map(argc, argv, ctx_params.ex, preset.options);
if (!ok) {
throw std::runtime_error("failed to parse CLI arguments into preset");
}
return preset;
}
common_presets common_preset_context::cascade(const common_presets & base, const common_presets & added) const {
common_presets out = base; // copy
for (const auto & [name, preset_added] : added) {
if (out.find(name) != out.end()) {
// if exists, merge
common_preset & target = out[name];
target.merge(preset_added);
} else {
// otherwise, add directly
out[name] = preset_added;
}
}
return out;
}
common_presets common_preset_context::cascade(const common_preset & base, const common_presets & presets) const {
common_presets out;
for (const auto & [name, preset] : presets) {
common_preset tmp = base; // copy
tmp.name = name;
tmp.merge(preset);
out[name] = std::move(tmp);
}
return out;
}

View File

@ -13,20 +13,62 @@
constexpr const char * COMMON_PRESET_DEFAULT_NAME = "default";
struct common_preset_context;
struct common_preset {
std::string name;
// TODO: support repeated args in the future
// options are stored as common_arg to string mapping, representing CLI arg and its value
std::map<common_arg, std::string> options;
// convert preset to CLI argument list
std::vector<std::string> to_args() const;
std::vector<std::string> to_args(const std::string & bin_path = "") const;
// convert preset to INI format string
std::string to_ini() const;
// TODO: maybe implement to_env() if needed
// modify preset options where argument is identified by its env variable
void set_option(const common_preset_context & ctx, const std::string & env, const std::string & value);
// unset option by its env variable
void unset_option(const std::string & env);
// get option value by its env variable, return false if not found
bool get_option(const std::string & env, std::string & value) const;
// merge another preset into this one, overwriting existing options
void merge(const common_preset & other);
};
// interface for multiple presets in one file
using common_presets = std::map<std::string, common_preset>;
common_presets common_presets_load(const std::string & path, common_params_context & ctx_params);
// context for loading and editing presets
struct common_preset_context {
common_params default_params; // unused for now
common_params_context ctx_params;
std::map<std::string, common_arg> key_to_opt;
common_preset_context(llama_example ex);
// load presets from INI file
common_presets load_from_ini(const std::string & path, common_preset & global) const;
// generate presets from cached models
common_presets load_from_cache() const;
// generate presets from local models directory
// for the directory structure, see "Using multiple models" in server/README.md
common_presets load_from_models_dir(const std::string & models_dir) const;
// generate one preset from CLI arguments
common_preset load_from_args(int argc, char ** argv) const;
// cascade multiple presets if exist on both: base < added
// if preset does not exist in base, it will be added without modification
common_presets cascade(const common_presets & base, const common_presets & added) const;
// apply presets over a base preset (same idea as CSS cascading)
common_presets cascade(const common_preset & base, const common_presets & presets) const;
};

View File

@ -1443,6 +1443,12 @@ Example:
```ini
version = 1
; (Optional) This section provides global settings shared across all presets.
; If the same key is defined in a specific preset, it will override the value in this global section.
[*]
c = 8192
n-gpu-layer = 8
; If the key corresponds to an existing model on the server,
; this will be used as the default config for that model
[ggml-org/MY-MODEL-GGUF:Q8_0]
@ -1462,12 +1468,17 @@ model-draft = ./my-models/draft.gguf
model-draft = /Users/abc/my-models/draft.gguf
; If the key does NOT correspond to an existing model,
; you need to specify at least the model path
; you need to specify at least the model path or HF repo
[custom_model]
model = /Users/abc/my-awesome-model-Q4_K_M.gguf
```
Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upload loading.
Note: some arguments are controlled by router (e.g., host, port, API key, HF repo, model alias). They will be removed or overwritten upon loading.
The precedence rule for preset options is as follows:
1. **Command-line arguments** passed to `llama-server` (highest priority)
2. **Model-specific options** defined in the preset file (e.g. `[ggml-org/MY-MODEL...]`)
3. **Global options** defined in the preset file (`[*]`)
### Routing requests

View File

@ -82,154 +82,30 @@ static std::filesystem::path get_server_exec_path() {
#endif
}
struct local_model {
std::string name;
std::string path;
std::string path_mmproj;
};
static std::vector<local_model> list_local_models(const std::string & dir) {
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
}
std::vector<local_model> models;
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
auto files = fs_list(subdir_path, false);
common_file_info model_file;
common_file_info first_shard_file;
common_file_info mmproj_file;
for (const auto & file : files) {
if (string_ends_with(file.name, ".gguf")) {
if (file.name.find("mmproj") != std::string::npos) {
mmproj_file = file;
} else if (file.name.find("-00001-of-") != std::string::npos) {
first_shard_file = file;
} else {
model_file = file;
}
}
}
// single file model
local_model model{
/* name */ name,
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
/* path_mmproj */ mmproj_file.path // can be empty
};
if (!model.path.empty()) {
models.push_back(model);
}
};
auto files = fs_list(dir, true);
for (const auto & file : files) {
if (file.is_dir) {
scan_subdir(file.path, file.name);
} else if (string_ends_with(file.name, ".gguf")) {
// single file model
std::string name = file.name;
string_replace_all(name, ".gguf", "");
local_model model{
/* name */ name,
/* path */ file.path,
/* path_mmproj */ ""
};
models.push_back(model);
}
}
return models;
}
//
// server_presets
//
server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path)
: ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) {
if (!presets_path.empty()) {
presets = common_presets_load(presets_path, ctx_params);
SRV_INF("Loaded %zu presets from %s\n", presets.size(), presets_path.c_str());
}
// populate reserved args (will be appended by the router)
for (auto & opt : ctx_params.options) {
if (opt.env == nullptr) {
continue;
}
std::string env = opt.env;
if (env == "LLAMA_ARG_PORT" ||
env == "LLAMA_ARG_HOST" ||
env == "LLAMA_ARG_ALIAS" ||
env == "LLAMA_ARG_API_KEY" ||
env == "LLAMA_ARG_MODELS_DIR" ||
env == "LLAMA_ARG_MODELS_MAX" ||
env == "LLAMA_ARG_MODELS_PRESET" ||
env == "LLAMA_ARG_MODEL" ||
env == "LLAMA_ARG_MMPROJ" ||
env == "LLAMA_ARG_HF_REPO" ||
env == "LLAMA_ARG_NO_MODELS_AUTOLOAD" ||
env == "LLAMA_ARG_SSL_KEY_FILE" ||
env == "LLAMA_ARG_SSL_CERT_FILE") {
control_args[env] = opt;
}
}
// read base args from router's argv
common_params_to_map(argc, argv, LLAMA_EXAMPLE_SERVER, base_args);
// remove any router-controlled args from base_args
for (const auto & cargs : control_args) {
auto it = base_args.find(cargs.second);
if (it != base_args.end()) {
base_args.erase(it);
}
static void unset_reserved_args(common_preset & preset, bool unset_model_args) {
preset.unset_option("LLAMA_ARG_SSL_KEY_FILE");
preset.unset_option("LLAMA_ARG_SSL_CERT_FILE");
preset.unset_option("LLAMA_API_KEY");
preset.unset_option("LLAMA_ARG_MODELS_DIR");
preset.unset_option("LLAMA_ARG_MODELS_MAX");
preset.unset_option("LLAMA_ARG_MODELS_PRESET");
preset.unset_option("LLAMA_ARG_MODELS_AUTOLOAD");
if (unset_model_args) {
preset.unset_option("LLAMA_ARG_MODEL");
preset.unset_option("LLAMA_ARG_MMPROJ");
preset.unset_option("LLAMA_ARG_HF_REPO");
}
}
common_preset server_presets::get_preset(const std::string & name) {
auto it = presets.find(name);
if (it != presets.end()) {
return it->second;
}
return common_preset();
}
void server_presets::render_args(server_model_meta & meta) {
common_preset preset = meta.preset; // copy
// merging 3 kinds of args:
// 1. model-specific args (from preset)
// force removing control args if any
for (auto & cargs : control_args) {
if (preset.options.find(cargs.second) != preset.options.end()) {
SRV_WRN("Preset '%s' contains reserved arg '%s', removing it\n", preset.name.c_str(), cargs.second.args[0]);
preset.options.erase(cargs.second);
}
}
// 2. base args (from router)
// inherit from base args
for (const auto & [arg, value] : base_args) {
preset.options[arg] = value;
}
// 3. control args (from router)
// set control values
preset.options[control_args["LLAMA_ARG_HOST"]] = CHILD_ADDR;
preset.options[control_args["LLAMA_ARG_PORT"]] = std::to_string(meta.port);
preset.options[control_args["LLAMA_ARG_ALIAS"]] = meta.name;
if (meta.in_cache) {
preset.options[control_args["LLAMA_ARG_HF_REPO"]] = meta.name;
} else {
preset.options[control_args["LLAMA_ARG_MODEL"]] = meta.path;
if (!meta.path_mmproj.empty()) {
preset.options[control_args["LLAMA_ARG_MMPROJ"]] = meta.path_mmproj;
}
}
// disable SSL for child processes (HTTPS already handled by router)
preset.options[control_args["LLAMA_ARG_SSL_KEY_FILE"]] = "";
preset.options[control_args["LLAMA_ARG_SSL_CERT_FILE"]] = "";
meta.args = preset.to_args();
// add back the binary path at the front
meta.args.insert(meta.args.begin(), get_server_exec_path().string());
void server_model_meta::update_args(common_preset_context & ctx_preset, std::string bin_path) {
// update params
unset_reserved_args(preset, false);
preset.set_option(ctx_preset, "LLAMA_ARG_HOST", CHILD_ADDR);
preset.set_option(ctx_preset, "LLAMA_ARG_PORT", std::to_string(port));
preset.set_option(ctx_preset, "LLAMA_ARG_ALIAS", name);
// TODO: maybe validate preset before rendering ?
// render args
args = preset.to_args(bin_path);
}
//
@ -240,20 +116,22 @@ server_models::server_models(
const common_params & params,
int argc,
char ** argv,
char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) {
for (int i = 0; i < argc; i++) {
base_args.push_back(std::string(argv[i]));
}
char ** envp)
: ctx_preset(LLAMA_EXAMPLE_SERVER),
base_params(params),
base_preset(ctx_preset.load_from_args(argc, argv)) {
for (char ** env = envp; *env != nullptr; env++) {
base_env.push_back(std::string(*env));
}
GGML_ASSERT(!base_args.empty());
// clean up base preset
unset_reserved_args(base_preset, true);
// set binary path
try {
base_args[0] = get_server_exec_path().string();
bin_path = get_server_exec_path().string();
} catch (const std::exception & e) {
bin_path = argv[0];
LOG_WRN("failed to get server executable path: %s\n", e.what());
LOG_WRN("using original argv[0] as fallback: %s\n", base_args[0].c_str());
LOG_WRN("using original argv[0] as fallback: %s\n", argv[0]);
}
load_models();
}
@ -262,7 +140,7 @@ void server_models::add_model(server_model_meta && meta) {
if (mapping.find(meta.name) != mapping.end()) {
throw std::runtime_error(string_format("model '%s' appears multiple times", meta.name.c_str()));
}
presets.render_args(meta); // populate meta.args
meta.update_args(ctx_preset, bin_path); // render args
std::string name = meta.name;
mapping[name] = instance_t{
/* subproc */ std::make_shared<subprocess_s>(),
@ -271,86 +149,62 @@ void server_models::add_model(server_model_meta && meta) {
};
}
static std::vector<local_model> list_custom_path_models(server_presets & presets) {
// detect any custom-path models in presets
std::vector<local_model> custom_models;
for (auto & [model_name, preset] : presets.presets) {
local_model model;
model.name = model_name;
std::vector<common_arg> to_erase;
for (auto & [arg, value] : preset.options) {
std::string env(arg.env ? arg.env : "");
if (env == "LLAMA_ARG_MODEL") {
model.path = value;
to_erase.push_back(arg);
}
if (env == "LLAMA_ARG_MMPROJ") {
model.path_mmproj = value;
to_erase.push_back(arg);
}
}
for (auto & arg : to_erase) {
preset.options.erase(arg);
}
if (!model.name.empty() && !model.path.empty()) {
custom_models.push_back(model);
}
}
return custom_models;
}
// TODO: allow refreshing cached model list
void server_models::load_models() {
// loading models from 3 sources:
// 1. cached models
auto cached_models = common_list_cached_models();
for (const auto & model : cached_models) {
server_model_meta meta{
/* preset */ presets.get_preset(model.to_string()),
/* name */ model.to_string(),
/* path */ model.manifest_path,
/* path_mmproj */ "", // auto-detected when loading
/* in_cache */ true,
/* port */ 0,
/* status */ SERVER_MODEL_STATUS_UNLOADED,
/* last_used */ 0,
/* args */ std::vector<std::string>(),
/* exit_code */ 0
};
add_model(std::move(meta));
}
// 2. local models specificed via --models-dir
common_presets cached_models = ctx_preset.load_from_cache();
SRV_INF("Loaded %zu cached model presets\n", cached_models.size());
// 2. local models from --models-dir
common_presets local_models;
if (!base_params.models_dir.empty()) {
auto local_models = list_local_models(base_params.models_dir);
for (const auto & model : local_models) {
if (mapping.find(model.name) != mapping.end()) {
// already exists in cached models, skip
continue;
local_models = ctx_preset.load_from_models_dir(base_params.models_dir);
SRV_INF("Loaded %zu local model presets from %s\n", local_models.size(), base_params.models_dir.c_str());
}
// 3. custom-path models from presets
common_preset global = {};
common_presets custom_presets = {};
if (!base_params.models_preset.empty()) {
custom_presets = ctx_preset.load_from_ini(base_params.models_preset, global);
SRV_INF("Loaded %zu custom model presets from %s\n", custom_presets.size(), base_params.models_preset.c_str());
}
// cascade, apply global preset first
cached_models = ctx_preset.cascade(global, cached_models);
local_models = ctx_preset.cascade(global, local_models);
custom_presets = ctx_preset.cascade(global, custom_presets);
// note: if a model exists in both cached and local, local takes precedence
common_presets final_presets;
for (const auto & [name, preset] : cached_models) {
final_presets[name] = preset;
}
for (const auto & [name, preset] : local_models) {
final_presets[name] = preset;
}
// process custom presets from INI
for (const auto & [name, custom] : custom_presets) {
if (final_presets.find(name) != final_presets.end()) {
// apply custom config if exists
common_preset & target = final_presets[name];
target.merge(custom);
} else {
// otherwise add directly
final_presets[name] = custom;
}
}
// server base preset from CLI args take highest precedence
for (auto & [name, preset] : final_presets) {
preset.merge(base_preset);
}
// convert presets to server_model_meta and add to mapping
for (const auto & preset : final_presets) {
server_model_meta meta{
/* preset */ presets.get_preset(model.name),
/* name */ model.name,
/* path */ model.path,
/* path_mmproj */ model.path_mmproj,
/* in_cache */ false,
/* port */ 0,
/* status */ SERVER_MODEL_STATUS_UNLOADED,
/* last_used */ 0,
/* args */ std::vector<std::string>(),
/* exit_code */ 0
};
add_model(std::move(meta));
}
}
// 3. custom-path models specified in presets
auto custom_models = list_custom_path_models(presets);
for (const auto & model : custom_models) {
server_model_meta meta{
/* preset */ presets.get_preset(model.name),
/* name */ model.name,
/* path */ model.path,
/* path_mmproj */ model.path_mmproj,
/* in_cache */ false,
/* preset */ preset.second,
/* name */ preset.first,
/* port */ 0,
/* status */ SERVER_MODEL_STATUS_UNLOADED,
/* last_used */ 0,
@ -359,10 +213,18 @@ void server_models::load_models() {
};
add_model(std::move(meta));
}
// log available models
{
std::unordered_set<std::string> custom_names;
for (const auto & [name, preset] : custom_presets) {
custom_names.insert(name);
}
SRV_INF("Available models (%zu) (*: custom preset)\n", mapping.size());
for (const auto & [name, inst] : mapping) {
SRV_INF(" %c %s\n", inst.meta.preset.name.empty() ? ' ' : '*', name.c_str());
bool has_custom = custom_names.find(name) != custom_names.end();
SRV_INF(" %c %s\n", has_custom ? '*' : ' ', name.c_str());
}
}
}
@ -526,7 +388,7 @@ void server_models::load(const std::string & name) {
{
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
presets.render_args(inst.meta); // update meta.args
inst.meta.update_args(ctx_preset, bin_path); // render args
std::vector<std::string> child_args = inst.meta.args; // copy
std::vector<std::string> child_env = base_env; // copy
@ -877,7 +739,12 @@ void server_models_routes::init_routes() {
{"args", meta.args},
};
if (!meta.preset.name.empty()) {
status["preset"] = meta.preset.to_ini();
common_preset preset_copy = meta.preset;
unset_reserved_args(preset_copy, false);
preset_copy.unset_option("LLAMA_ARG_HOST");
preset_copy.unset_option("LLAMA_ARG_PORT");
preset_copy.unset_option("LLAMA_ARG_ALIAS");
status["preset"] = preset_copy.to_ini();
}
if (meta.is_failed()) {
status["exit_code"] = meta.exit_code;
@ -888,8 +755,6 @@ void server_models_routes::init_routes() {
{"object", "model"}, // for OAI-compat
{"owned_by", "llamacpp"}, // for OAI-compat
{"created", t}, // for OAI-compat
{"in_cache", meta.in_cache},
{"path", meta.path},
{"status", status},
// TODO: add other fields, may require reading GGUF metadata
});

View File

@ -51,9 +51,6 @@ static std::string server_model_status_to_string(server_model_status status) {
struct server_model_meta {
common_preset preset;
std::string name;
std::string path;
std::string path_mmproj; // only available if in_cache=false
bool in_cache = false; // if true, use -hf; use -m otherwise
int port = 0;
server_model_status status = SERVER_MODEL_STATUS_UNLOADED;
int64_t last_used = 0; // for LRU unloading
@ -67,19 +64,8 @@ struct server_model_meta {
bool is_failed() const {
return status == SERVER_MODEL_STATUS_UNLOADED && exit_code != 0;
}
};
// the server_presets struct holds the presets read from presets.ini
// as well as base args from the router server
struct server_presets {
common_presets presets;
common_params_context ctx_params;
std::map<common_arg, std::string> base_args;
std::map<std::string, common_arg> control_args; // args reserved for server control
server_presets(int argc, char ** argv, common_params & base_params, const std::string & models_dir);
common_preset get_preset(const std::string & name);
void render_args(server_model_meta & meta);
void update_args(common_preset_context & ctx_presets, std::string bin_path);
};
struct subprocess_s;
@ -97,11 +83,12 @@ private:
std::condition_variable cv;
std::map<std::string, instance_t> mapping;
common_params base_params;
std::vector<std::string> base_args;
std::vector<std::string> base_env;
common_preset_context ctx_preset;
server_presets presets;
common_params base_params;
std::string bin_path;
std::vector<std::string> base_env;
common_preset base_preset; // base preset from llama-server CLI args
void update_meta(const std::string & name, const server_model_meta & meta);