diff --git a/common/arg.cpp b/common/arg.cpp index 0da57ab1e6..062046c0d0 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2488,13 +2488,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.models_max = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX")); - add_opt(common_arg( - {"--models-allow-extra-args"}, - string_format("for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: %s)", params.models_allow_extra_args ? "enabled" : "disabled"), - [](common_params & params) { - params.models_allow_extra_args = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS")); add_opt(common_arg( {"--no-models-autoload"}, "disables automatic loading of models (default: enabled)", diff --git a/common/common.h b/common/common.h index ff42cc6584..4ac9700d7b 100644 --- a/common/common.h +++ b/common/common.h @@ -462,7 +462,6 @@ struct common_params { std::string models_dir = ""; // directory containing models for the router server int models_max = 4; // maximum number of models to load simultaneously bool models_autoload = true; // automatically load models when requested via the router server - bool models_allow_extra_args = false; // allow passing extra arguments when loading models via the router server bool log_json = false; diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 86d190236d..bf8e77854c 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -324,7 +324,7 @@ void server_models::unload_lru() { } } -void server_models::load(const std::string & name, const std::vector & extra_args, bool auto_load) { +void server_models::load(const std::string & name, bool auto_load) { if (!has_model(name)) { throw std::runtime_error("model name=" + name + " is not found"); } @@ -381,13 +381,6 @@ void server_models::load(const std::string & name, const std::vector child_env = base_env; // copy @@ -536,7 +529,7 @@ bool server_models::ensure_model_loaded(const std::string & name) { } if (meta->status == SERVER_MODEL_STATUS_UNLOADED) { SRV_INF("model name=%s is not loaded, loading...\n", name.c_str()); - load(name, {}, true); + load(name, true); } SRV_INF("waiting until model name=%s is fully loaded...\n", name.c_str()); diff --git a/tools/server/server-models.h b/tools/server/server-models.h index 68a8ac302e..dd8487ca21 100644 --- a/tools/server/server-models.h +++ b/tools/server/server-models.h @@ -103,7 +103,7 @@ public: std::vector get_all_meta(); // if auto_load is true, load the model with previous args if any - void load(const std::string & name, const std::vector & extra_args, bool auto_load); + void load(const std::string & name, bool auto_load); void unload(const std::string & name); void unload_all(); diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 201101c8f8..5ebcb08af8 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -3311,11 +3311,6 @@ public: auto res = std::make_unique(ctx_server); json body = json::parse(req.body); std::string name = json_value(body, "model", std::string()); - std::vector extra_args = json_value(body, "extra_args", std::vector()); - if (!params.models_allow_extra_args && !extra_args.empty()) { - res->error(format_error_response("extra_args is not allowed", ERROR_TYPE_INVALID_REQUEST)); - return res; - } auto model = models->get_meta(name); if (!model.has_value()) { res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND)); @@ -3325,7 +3320,7 @@ public: res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST)); return res; } - models->load(name, extra_args, false); + models->load(name, false); res->ok({{"success", true}}); return res; }; @@ -4014,9 +4009,6 @@ int main(int argc, char ** argv, char ** envp) { LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str()); LOG_INF("%s: NOTE: router mode is experimental\n", __func__); LOG_INF("%s: it is not recommended to use this mode in untrusted environments\n", __func__); - if (params.models_allow_extra_args) { - LOG_WRN("%s: extra_args is enabled; this may lead to security issues if the server is exposed to untrusted clients\n", __func__); - } if (ctx_http.thread.joinable()) { ctx_http.thread.join(); // keep the main thread alive }