From 6ed192b4dd2b42635008385ce35910c46bb37203 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 24 Nov 2025 12:01:16 +0100 Subject: [PATCH] add --models-allow-extra-args for security --- common/arg.cpp | 7 +++++++ common/common.h | 1 + tools/server/README.md | 5 +++-- tools/server/server-models.cpp | 6 ++++-- tools/server/server.cpp | 7 +++++++ 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 062046c0d0..0da57ab1e6 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2488,6 +2488,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.models_max = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX")); + add_opt(common_arg( + {"--models-allow-extra-args"}, + string_format("for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: %s)", params.models_allow_extra_args ? "enabled" : "disabled"), + [](common_params & params) { + params.models_allow_extra_args = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS")); add_opt(common_arg( {"--no-models-autoload"}, "disables automatic loading of models (default: enabled)", diff --git a/common/common.h b/common/common.h index 4ac9700d7b..ff42cc6584 100644 --- a/common/common.h +++ b/common/common.h @@ -462,6 +462,7 @@ struct common_params { std::string models_dir = ""; // directory containing models for the router server int models_max = 4; // maximum number of models to load simultaneously bool models_autoload = true; // automatically load models when requested via the router server + bool models_allow_extra_args = false; // allow passing extra arguments when loading models via the router server bool log_json = false; diff --git a/tools/server/README.md b/tools/server/README.md index 24984d8696..6b911b635a 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -197,6 +197,7 @@ The project is under active development, and we are [looking for feedback and co | `--slot-save-path PATH` | path to save slot kv cache (default: disabled) | | `--models-dir PATH` | directory containing models for the router server (default: disabled)
(env: LLAMA_ARG_MODELS_DIR) | | `--models-max N` | for router server, maximum number of models to load simultaneously (default: 4, 0 = unlimited)
(env: LLAMA_ARG_MODELS_MAX) | +| `--models-allow-extra-args` | for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: disabled)
(env: LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS) | | `--no-models-autoload` | disables automatic loading of models (default: enabled)
(env: LLAMA_ARG_NO_MODELS_AUTOLOAD) | | `--jinja` | use jinja template for chat (default: disabled)
(env: LLAMA_ARG_JINJA) | | `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:
- none: leaves thoughts unparsed in `message.content`
- deepseek: puts thoughts in `message.reasoning_content`
- deepseek-legacy: keeps `` tags in `message.content` while also populating `message.reasoning_content`
(default: auto)
(env: LLAMA_ARG_THINK) | @@ -1495,8 +1496,8 @@ The `status` object can be: Load a model Payload: -- `model`: name of the model to be loaded -- `extra_args`: (optional) an array of additional arguments to be passed to the model instance +- `model`: name of the model to be loaded. +- `extra_args`: (optional) an array of additional arguments to be passed to the model instance. Note: you must start the server with `--models-allow-extra-args` to enable this feature. ```json { diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 6ab0a9c226..cb19d2c341 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -383,8 +383,10 @@ void server_models::load(const std::string & name, const std::vector extra_args = json_value(body, "extra_args", std::vector()); + if (!params.models_allow_extra_args && !extra_args.empty()) { + res->error(format_error_response("extra_args is not allowed", ERROR_TYPE_INVALID_REQUEST)); + return res; + } auto model = models->get_meta(name); if (!model.has_value()) { res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND)); @@ -5862,6 +5866,9 @@ int main(int argc, char ** argv, char ** envp) { LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str()); LOG_INF("%s: NOTE: router mode is experimental\n", __func__); LOG_INF("%s: it is not recommended to use this mode in untrusted environments\n", __func__); + if (params.models_allow_extra_args) { + LOG_WRN("%s: extra_args is enabled; this may lead to security issues if the server is exposed to untrusted clients\n", __func__); + } ctx_http.is_ready.store(true); if (ctx_http.thread.joinable()) { ctx_http.thread.join(); // keep the main thread alive