add --models-allow-extra-args for security

This commit is contained in:
Xuan Son Nguyen 2025-11-24 12:01:16 +01:00
parent 5ef3f990b9
commit 6ed192b4dd
5 changed files with 22 additions and 4 deletions

View File

@ -2488,6 +2488,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.models_max = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
add_opt(common_arg(
{"--models-allow-extra-args"},
string_format("for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: %s)", params.models_allow_extra_args ? "enabled" : "disabled"),
[](common_params & params) {
params.models_allow_extra_args = true;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS"));
add_opt(common_arg(
{"--no-models-autoload"},
"disables automatic loading of models (default: enabled)",

View File

@ -462,6 +462,7 @@ struct common_params {
std::string models_dir = ""; // directory containing models for the router server
int models_max = 4; // maximum number of models to load simultaneously
bool models_autoload = true; // automatically load models when requested via the router server
bool models_allow_extra_args = false; // allow passing extra arguments when loading models via the router server
bool log_json = false;

View File

@ -197,6 +197,7 @@ The project is under active development, and we are [looking for feedback and co
| `--slot-save-path PATH` | path to save slot kv cache (default: disabled) |
| `--models-dir PATH` | directory containing models for the router server (default: disabled)<br/>(env: LLAMA_ARG_MODELS_DIR) |
| `--models-max N` | for router server, maximum number of models to load simultaneously (default: 4, 0 = unlimited)<br/>(env: LLAMA_ARG_MODELS_MAX) |
| `--models-allow-extra-args` | for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: disabled)<br/>(env: LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS) |
| `--no-models-autoload` | disables automatic loading of models (default: enabled)<br/>(env: LLAMA_ARG_NO_MODELS_AUTOLOAD) |
| `--jinja` | use jinja template for chat (default: disabled)<br/>(env: LLAMA_ARG_JINJA) |
| `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content`<br/>- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`<br/>(default: auto)<br/>(env: LLAMA_ARG_THINK) |
@ -1495,8 +1496,8 @@ The `status` object can be:
Load a model
Payload:
- `model`: name of the model to be loaded
- `extra_args`: (optional) an array of additional arguments to be passed to the model instance
- `model`: name of the model to be loaded.
- `extra_args`: (optional) an array of additional arguments to be passed to the model instance. Note: you must start the server with `--models-allow-extra-args` to enable this feature.
```json
{

View File

@ -383,8 +383,10 @@ void server_models::load(const std::string & name, const std::vector<std::string
child_args.push_back(std::to_string(inst.meta.port));
// append extra args
for (const auto & arg : extra_args) {
child_args.push_back(arg);
if (base_params.models_allow_extra_args) {
for (const auto & arg : extra_args) {
child_args.push_back(arg);
}
}
}

View File

@ -5165,6 +5165,10 @@ public:
json body = json::parse(req.body);
std::string name = json_value(body, "model", std::string());
std::vector<std::string> extra_args = json_value(body, "extra_args", std::vector<std::string>());
if (!params.models_allow_extra_args && !extra_args.empty()) {
res->error(format_error_response("extra_args is not allowed", ERROR_TYPE_INVALID_REQUEST));
return res;
}
auto model = models->get_meta(name);
if (!model.has_value()) {
res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
@ -5862,6 +5866,9 @@ int main(int argc, char ** argv, char ** envp) {
LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
LOG_INF("%s: NOTE: router mode is experimental\n", __func__);
LOG_INF("%s: it is not recommended to use this mode in untrusted environments\n", __func__);
if (params.models_allow_extra_args) {
LOG_WRN("%s: extra_args is enabled; this may lead to security issues if the server is exposed to untrusted clients\n", __func__);
}
ctx_http.is_ready.store(true);
if (ctx_http.thread.joinable()) {
ctx_http.thread.join(); // keep the main thread alive