remove support for extra args
This commit is contained in:
parent
e514b86d2b
commit
e40f35fb61
|
|
@ -2488,13 +2488,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
params.models_max = value;
|
params.models_max = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
|
||||||
add_opt(common_arg(
|
|
||||||
{"--models-allow-extra-args"},
|
|
||||||
string_format("for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: %s)", params.models_allow_extra_args ? "enabled" : "disabled"),
|
|
||||||
[](common_params & params) {
|
|
||||||
params.models_allow_extra_args = true;
|
|
||||||
}
|
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS"));
|
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--no-models-autoload"},
|
{"--no-models-autoload"},
|
||||||
"disables automatic loading of models (default: enabled)",
|
"disables automatic loading of models (default: enabled)",
|
||||||
|
|
|
||||||
|
|
@ -462,7 +462,6 @@ struct common_params {
|
||||||
std::string models_dir = ""; // directory containing models for the router server
|
std::string models_dir = ""; // directory containing models for the router server
|
||||||
int models_max = 4; // maximum number of models to load simultaneously
|
int models_max = 4; // maximum number of models to load simultaneously
|
||||||
bool models_autoload = true; // automatically load models when requested via the router server
|
bool models_autoload = true; // automatically load models when requested via the router server
|
||||||
bool models_allow_extra_args = false; // allow passing extra arguments when loading models via the router server
|
|
||||||
|
|
||||||
bool log_json = false;
|
bool log_json = false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -324,7 +324,7 @@ void server_models::unload_lru() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void server_models::load(const std::string & name, const std::vector<std::string> & extra_args, bool auto_load) {
|
void server_models::load(const std::string & name, bool auto_load) {
|
||||||
if (!has_model(name)) {
|
if (!has_model(name)) {
|
||||||
throw std::runtime_error("model name=" + name + " is not found");
|
throw std::runtime_error("model name=" + name + " is not found");
|
||||||
}
|
}
|
||||||
|
|
@ -381,13 +381,6 @@ void server_models::load(const std::string & name, const std::vector<std::string
|
||||||
child_args.push_back(inst.meta.name);
|
child_args.push_back(inst.meta.name);
|
||||||
child_args.push_back("--port");
|
child_args.push_back("--port");
|
||||||
child_args.push_back(std::to_string(inst.meta.port));
|
child_args.push_back(std::to_string(inst.meta.port));
|
||||||
|
|
||||||
// append extra args
|
|
||||||
if (base_params.models_allow_extra_args) {
|
|
||||||
for (const auto & arg : extra_args) {
|
|
||||||
child_args.push_back(arg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> child_env = base_env; // copy
|
std::vector<std::string> child_env = base_env; // copy
|
||||||
|
|
@ -536,7 +529,7 @@ bool server_models::ensure_model_loaded(const std::string & name) {
|
||||||
}
|
}
|
||||||
if (meta->status == SERVER_MODEL_STATUS_UNLOADED) {
|
if (meta->status == SERVER_MODEL_STATUS_UNLOADED) {
|
||||||
SRV_INF("model name=%s is not loaded, loading...\n", name.c_str());
|
SRV_INF("model name=%s is not loaded, loading...\n", name.c_str());
|
||||||
load(name, {}, true);
|
load(name, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
SRV_INF("waiting until model name=%s is fully loaded...\n", name.c_str());
|
SRV_INF("waiting until model name=%s is fully loaded...\n", name.c_str());
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,7 @@ public:
|
||||||
std::vector<server_model_meta> get_all_meta();
|
std::vector<server_model_meta> get_all_meta();
|
||||||
|
|
||||||
// if auto_load is true, load the model with previous args if any
|
// if auto_load is true, load the model with previous args if any
|
||||||
void load(const std::string & name, const std::vector<std::string> & extra_args, bool auto_load);
|
void load(const std::string & name, bool auto_load);
|
||||||
void unload(const std::string & name);
|
void unload(const std::string & name);
|
||||||
void unload_all();
|
void unload_all();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3311,11 +3311,6 @@ public:
|
||||||
auto res = std::make_unique<server_res_generator>(ctx_server);
|
auto res = std::make_unique<server_res_generator>(ctx_server);
|
||||||
json body = json::parse(req.body);
|
json body = json::parse(req.body);
|
||||||
std::string name = json_value(body, "model", std::string());
|
std::string name = json_value(body, "model", std::string());
|
||||||
std::vector<std::string> extra_args = json_value(body, "extra_args", std::vector<std::string>());
|
|
||||||
if (!params.models_allow_extra_args && !extra_args.empty()) {
|
|
||||||
res->error(format_error_response("extra_args is not allowed", ERROR_TYPE_INVALID_REQUEST));
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
auto model = models->get_meta(name);
|
auto model = models->get_meta(name);
|
||||||
if (!model.has_value()) {
|
if (!model.has_value()) {
|
||||||
res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
|
res->error(format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
|
||||||
|
|
@ -3325,7 +3320,7 @@ public:
|
||||||
res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
|
res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
models->load(name, extra_args, false);
|
models->load(name, false);
|
||||||
res->ok({{"success", true}});
|
res->ok({{"success", true}});
|
||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
|
|
@ -4014,9 +4009,6 @@ int main(int argc, char ** argv, char ** envp) {
|
||||||
LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
|
LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
|
||||||
LOG_INF("%s: NOTE: router mode is experimental\n", __func__);
|
LOG_INF("%s: NOTE: router mode is experimental\n", __func__);
|
||||||
LOG_INF("%s: it is not recommended to use this mode in untrusted environments\n", __func__);
|
LOG_INF("%s: it is not recommended to use this mode in untrusted environments\n", __func__);
|
||||||
if (params.models_allow_extra_args) {
|
|
||||||
LOG_WRN("%s: extra_args is enabled; this may lead to security issues if the server is exposed to untrusted clients\n", __func__);
|
|
||||||
}
|
|
||||||
if (ctx_http.thread.joinable()) {
|
if (ctx_http.thread.joinable()) {
|
||||||
ctx_http.thread.join(); // keep the main thread alive
|
ctx_http.thread.join(); // keep the main thread alive
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue