Merge be9837b66e into 9e2e2198b0
This commit is contained in:
commit
3e6e58c196
|
|
@ -3859,6 +3859,12 @@ void common_params_add_preset_options(std::vector<common_arg> & args) {
|
|||
[](common_params &, const std::string &) { /* unused */ }
|
||||
).set_env(COMMON_ARG_PRESET_LOAD_ON_STARTUP).set_preset_only());
|
||||
|
||||
args.push_back(common_arg(
|
||||
{"default-model"}, "NAME",
|
||||
"in server router mode, this model will be used if model not found",
|
||||
[](common_params &, const std::string &) { /* unused */ }
|
||||
).set_env(COMMON_ARG_PRESET_DEFAULT_MODEL).set_preset_only());
|
||||
|
||||
args.push_back(common_arg(
|
||||
{"stop-timeout"}, "SECONDS",
|
||||
"in server router mode, force-kill model instance after this many seconds of graceful shutdown",
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
// pseudo-env variable to identify preset-only arguments
|
||||
#define COMMON_ARG_PRESET_LOAD_ON_STARTUP "__PRESET_LOAD_ON_STARTUP"
|
||||
#define COMMON_ARG_PRESET_STOP_TIMEOUT "__PRESET_STOP_TIMEOUT"
|
||||
#define COMMON_ARG_PRESET_DEFAULT_MODEL "__PRESET_DEFAULT_MODEL"
|
||||
|
||||
//
|
||||
// CLI argument parsing
|
||||
|
|
|
|||
|
|
@ -1552,6 +1552,8 @@ The precedence rule for preset options is as follows:
|
|||
|
||||
We also offer additional options that are exclusive to presets (these aren't treated as command-line arguments):
|
||||
- `load-on-startup` (boolean): Controls whether the model loads automatically when the server starts
|
||||
- `default-model` (boolean): The model to use when no model is specified in a request or the model is not found.
|
||||
When multiple `default-model` options are found only the first one will be used.
|
||||
- `stop-timeout` (int, seconds): After requested unload, wait for this many seconds before forcing termination (default: 10)
|
||||
|
||||
### Routing requests
|
||||
|
|
|
|||
|
|
@ -306,6 +306,30 @@ void server_models::load_models() {
|
|||
add_model(std::move(meta));
|
||||
}
|
||||
|
||||
// determine default model if any
|
||||
bool first_found = false;
|
||||
std::string first_default;
|
||||
for (const auto & [name, inst] : mapping) {
|
||||
std::string val;
|
||||
if (!inst.meta.preset.get_option(COMMON_ARG_PRESET_DEFAULT_MODEL, val)) {
|
||||
continue;
|
||||
}
|
||||
if (!first_found) {
|
||||
default_model_name = name;
|
||||
SRV_INF("Default preset model: %s\n", name.c_str());
|
||||
first_default = name;
|
||||
first_found = true;
|
||||
} else {
|
||||
SRV_WRN(
|
||||
"Multiple default models detected: '%s' and '%s'; "
|
||||
"using '%s' as default\n",
|
||||
name.c_str(),
|
||||
first_default.c_str(),
|
||||
first_default.c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// log available models
|
||||
{
|
||||
std::unordered_set<std::string> custom_names;
|
||||
|
|
@ -374,6 +398,15 @@ void server_models::load_models() {
|
|||
}
|
||||
}
|
||||
|
||||
std::string server_models::resolve_model_name(const std::string & requested) {
|
||||
// If a non‑empty request matches a known model, use it.
|
||||
if (!requested.empty() && has_model(requested)) {
|
||||
return requested;
|
||||
}
|
||||
// Otherwise fall back to the default model if one is set.
|
||||
return default_model_name.empty() ? requested : default_model_name;
|
||||
}
|
||||
|
||||
void server_models::update_meta(const std::string & name, const server_model_meta & meta) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
|
|
@ -904,6 +937,7 @@ void server_models_routes::init_routes() {
|
|||
this->proxy_get = [this](const server_http_req & req) {
|
||||
std::string method = "GET";
|
||||
std::string name = req.get_param("model");
|
||||
name = models.resolve_model_name(name);
|
||||
bool autoload = is_autoload(params, req);
|
||||
auto error_res = std::make_unique<server_http_res>();
|
||||
if (!router_validate_model(name, models, autoload, error_res)) {
|
||||
|
|
@ -916,6 +950,7 @@ void server_models_routes::init_routes() {
|
|||
std::string method = "POST";
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
name = models.resolve_model_name(name);
|
||||
bool autoload = is_autoload(params, req);
|
||||
auto error_res = std::make_unique<server_http_res>();
|
||||
if (!router_validate_model(name, models, autoload, error_res)) {
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@ private:
|
|||
std::string bin_path;
|
||||
std::vector<std::string> base_env;
|
||||
common_preset base_preset; // base preset from llama-server CLI args
|
||||
std::string default_model_name;
|
||||
|
||||
void update_meta(const std::string & name, const server_model_meta & meta);
|
||||
|
||||
|
|
@ -143,6 +144,9 @@ public:
|
|||
// notify the router server that a model instance is ready
|
||||
// return the monitoring thread (to be joined by the caller)
|
||||
static std::thread setup_child_server(const std::function<void(int)> & shutdown_handler);
|
||||
|
||||
// Resolve model name: fallback to default if requested name is empty or not found
|
||||
std::string resolve_model_name(const std::string & requested);
|
||||
};
|
||||
|
||||
struct server_models_routes {
|
||||
|
|
|
|||
Loading…
Reference in New Issue