allow reusing args if auto_load
This commit is contained in:
parent
f927e21ffc
commit
74685f4194
|
|
@ -322,7 +322,7 @@ void server_models::unload_lru() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void server_models::load(const std::string & name, const std::vector<std::string> & extra_args) {
|
void server_models::load(const std::string & name, const std::vector<std::string> & extra_args, bool auto_load) {
|
||||||
if (!has_model(name)) {
|
if (!has_model(name)) {
|
||||||
throw std::runtime_error("model name=" + name + " is not found");
|
throw std::runtime_error("model name=" + name + " is not found");
|
||||||
}
|
}
|
||||||
|
|
@ -352,26 +352,38 @@ void server_models::load(const std::string & name, const std::vector<std::string
|
||||||
std::string exec_path = get_server_exec_path().string();
|
std::string exec_path = get_server_exec_path().string();
|
||||||
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
|
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
|
||||||
|
|
||||||
std::vector<std::string> child_args = base_args; // copy
|
std::vector<std::string> child_args;
|
||||||
if (inst.meta.in_cache) {
|
if (auto_load && !meta.args.empty()) {
|
||||||
child_args.push_back("-hf");
|
child_args = meta.args; // reuse previous args
|
||||||
child_args.push_back(inst.meta.name);
|
// update port arg
|
||||||
} else {
|
for (size_t i = 0; i < child_args.size(); i++) {
|
||||||
child_args.push_back("-m");
|
if (child_args[i] == "--port" && i + 1 < child_args.size()) {
|
||||||
child_args.push_back(inst.meta.path);
|
child_args[i + 1] = std::to_string(inst.meta.port);
|
||||||
if (!inst.meta.path_mmproj.empty()) {
|
break;
|
||||||
child_args.push_back("--mmproj");
|
}
|
||||||
child_args.push_back(inst.meta.path_mmproj);
|
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
child_args.push_back("--alias");
|
child_args = base_args; // copy
|
||||||
child_args.push_back(inst.meta.name);
|
if (inst.meta.in_cache) {
|
||||||
child_args.push_back("--port");
|
child_args.push_back("-hf");
|
||||||
child_args.push_back(std::to_string(inst.meta.port));
|
child_args.push_back(inst.meta.name);
|
||||||
|
} else {
|
||||||
|
child_args.push_back("-m");
|
||||||
|
child_args.push_back(inst.meta.path);
|
||||||
|
if (!inst.meta.path_mmproj.empty()) {
|
||||||
|
child_args.push_back("--mmproj");
|
||||||
|
child_args.push_back(inst.meta.path_mmproj);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
child_args.push_back("--alias");
|
||||||
|
child_args.push_back(inst.meta.name);
|
||||||
|
child_args.push_back("--port");
|
||||||
|
child_args.push_back(std::to_string(inst.meta.port));
|
||||||
|
|
||||||
// append extra args
|
// append extra args
|
||||||
for (const auto & arg : extra_args) {
|
for (const auto & arg : extra_args) {
|
||||||
child_args.push_back(arg);
|
child_args.push_back(arg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> child_env = base_env; // copy
|
std::vector<std::string> child_env = base_env; // copy
|
||||||
|
|
@ -502,7 +514,7 @@ bool server_models::ensure_model_loaded(const std::string & name) {
|
||||||
return false; // already loaded
|
return false; // already loaded
|
||||||
}
|
}
|
||||||
SRV_INF("model name=%s is not loaded, loading...\n", name.c_str());
|
SRV_INF("model name=%s is not loaded, loading...\n", name.c_str());
|
||||||
load(name, {});
|
load(name, {}, true);
|
||||||
wait_until_loaded(name);
|
wait_until_loaded(name);
|
||||||
{
|
{
|
||||||
// check final status
|
// check final status
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,8 @@ public:
|
||||||
// return a copy of all model metadata
|
// return a copy of all model metadata
|
||||||
std::vector<server_model_meta> get_all_meta();
|
std::vector<server_model_meta> get_all_meta();
|
||||||
|
|
||||||
void load(const std::string & name, const std::vector<std::string> & extra_args);
|
// if auto_load is true, load the model with previous args if any
|
||||||
|
void load(const std::string & name, const std::vector<std::string> & extra_args, bool auto_load);
|
||||||
void unload(const std::string & name);
|
void unload(const std::string & name);
|
||||||
void unload_all();
|
void unload_all();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5168,7 +5168,7 @@ public:
|
||||||
res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
|
res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
models->load(name, extra_args);
|
models->load(name, extra_args, false);
|
||||||
res->ok({{"success", true}});
|
res->ok({{"success", true}});
|
||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue