add --models-allow-extra-args for security

2025-11-24 12:01:16 +01:00 · 2025-11-24 12:01:16 +01:00 · 6ed192b4dd
parent 5ef3f990b9
commit 6ed192b4dd
5 changed files with 22 additions and 4 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@ -2488,6 +2488,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            params.models_max = value;
        }
    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
+    add_opt(common_arg(
+        {"--models-allow-extra-args"},
+        string_format("for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: %s)", params.models_allow_extra_args ? "enabled" : "disabled"),
+        [](common_params & params) {
+            params.models_allow_extra_args = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS"));
    add_opt(common_arg(
        {"--no-models-autoload"},
        "disables automatic loading of models (default: enabled)",
--- a/common/common.h
+++ b/common/common.h
@ -462,6 +462,7 @@ struct common_params {
    std::string models_dir = ""; // directory containing models for the router server
    int models_max = 4;          // maximum number of models to load simultaneously
    bool models_autoload = true; // automatically load models when requested via the router server
+    bool models_allow_extra_args = false; // allow passing extra arguments when loading models via the router server

    bool log_json = false;

--- a/tools/server/README.md
+++ b/tools/server/README.md
@ -197,6 +197,7 @@ The project is under active development, and we are [looking for feedback and co
 | `--slot-save-path PATH` | path to save slot kv cache (default: disabled) |
 | `--models-dir PATH` | directory containing models for the router server (default: disabled)<br/>(env: LLAMA_ARG_MODELS_DIR) |
 | `--models-max N` | for router server, maximum number of models to load simultaneously (default: 4, 0 = unlimited)<br/>(env: LLAMA_ARG_MODELS_MAX) |
+| `--models-allow-extra-args` | for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: disabled)<br/>(env: LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS) |
 | `--no-models-autoload` | disables automatic loading of models (default: enabled)<br/>(env: LLAMA_ARG_NO_MODELS_AUTOLOAD) |
 | `--jinja` | use jinja template for chat (default: disabled)<br/>(env: LLAMA_ARG_JINJA) |
 | `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content`<br/>- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`<br/>(default: auto)<br/>(env: LLAMA_ARG_THINK) |
@ -1495,8 +1496,8 @@ The `status` object can be:
 Load a model

 Payload:
- `model`: name of the model to be loaded
- `extra_args`: (optional) an array of additional arguments to be passed to the model instance
+- `model`: name of the model to be loaded.
+- `extra_args`: (optional) an array of additional arguments to be passed to the model instance. Note: you must start the server with `--models-allow-extra-args` to enable this feature.

 ```json
 {
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@ -383,8 +383,10 @@ void server_models::load(const std::string & name, const std::vector<std::string
            child_args.push_back(std::to_string(inst.meta.port));

            // append extra args
-            for (const auto & arg : extra_args) {
-                child_args.push_back(arg);
+            if (base_params.models_allow_extra_args) {
+                for (const auto & arg : extra_args) {
+                    child_args.push_back(arg);
+                }
            }
        }

--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@ -5165,6 +5165,10 @@ public:
        json body = json::parse(req.body);
        std::string name = json_value(body, "model", std::string());
        std::vector<std::string> extra_args = json_value(body, "extra_args", std::vector<std::string>());
+        if (!params.models_allow_extra_args && !extra_args.empty()) {
+            res->error(format_error_response("extra_args is not allowed", ERROR_TYPE_INVALID_REQUEST));
+            return res;
+        }
        auto model = models->get_meta(name);
        if (!model.has_value()) {
            res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
@ -5862,6 +5866,9 @@ int main(int argc, char ** argv, char ** envp) {
        LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
        LOG_INF("%s: NOTE: router mode is experimental\n", __func__);
        LOG_INF("%s:       it is not recommended to use this mode in untrusted environments\n", __func__);
+        if (params.models_allow_extra_args) {
+            LOG_WRN("%s: extra_args is enabled; this may lead to security issues if the server is exposed to untrusted clients\n", __func__);
+        }
        ctx_http.is_ready.store(true);
        if (ctx_http.thread.joinable()) {
            ctx_http.thread.join(); // keep the main thread alive