allow reusing args if auto_load

This commit is contained in:
Xuan Son Nguyen 2025-11-23 15:42:33 +01:00
parent f927e21ffc
commit 74685f4194
3 changed files with 35 additions and 22 deletions

View File

@ -322,7 +322,7 @@ void server_models::unload_lru() {
}
}
void server_models::load(const std::string & name, const std::vector<std::string> & extra_args) {
void server_models::load(const std::string & name, const std::vector<std::string> & extra_args, bool auto_load) {
if (!has_model(name)) {
throw std::runtime_error("model name=" + name + " is not found");
}
@ -352,26 +352,38 @@ void server_models::load(const std::string & name, const std::vector<std::string
std::string exec_path = get_server_exec_path().string();
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
std::vector<std::string> child_args = base_args; // copy
if (inst.meta.in_cache) {
child_args.push_back("-hf");
child_args.push_back(inst.meta.name);
} else {
child_args.push_back("-m");
child_args.push_back(inst.meta.path);
if (!inst.meta.path_mmproj.empty()) {
child_args.push_back("--mmproj");
child_args.push_back(inst.meta.path_mmproj);
std::vector<std::string> child_args;
if (auto_load && !meta.args.empty()) {
child_args = meta.args; // reuse previous args
// update port arg
for (size_t i = 0; i < child_args.size(); i++) {
if (child_args[i] == "--port" && i + 1 < child_args.size()) {
child_args[i + 1] = std::to_string(inst.meta.port);
break;
}
}
}
child_args.push_back("--alias");
child_args.push_back(inst.meta.name);
child_args.push_back("--port");
child_args.push_back(std::to_string(inst.meta.port));
} else {
child_args = base_args; // copy
if (inst.meta.in_cache) {
child_args.push_back("-hf");
child_args.push_back(inst.meta.name);
} else {
child_args.push_back("-m");
child_args.push_back(inst.meta.path);
if (!inst.meta.path_mmproj.empty()) {
child_args.push_back("--mmproj");
child_args.push_back(inst.meta.path_mmproj);
}
}
child_args.push_back("--alias");
child_args.push_back(inst.meta.name);
child_args.push_back("--port");
child_args.push_back(std::to_string(inst.meta.port));
// append extra args
for (const auto & arg : extra_args) {
child_args.push_back(arg);
// append extra args
for (const auto & arg : extra_args) {
child_args.push_back(arg);
}
}
std::vector<std::string> child_env = base_env; // copy
@ -502,7 +514,7 @@ bool server_models::ensure_model_loaded(const std::string & name) {
return false; // already loaded
}
SRV_INF("model name=%s is not loaded, loading...\n", name.c_str());
load(name, {});
load(name, {}, true);
wait_until_loaded(name);
{
// check final status

View File

@ -100,7 +100,8 @@ public:
// return a copy of all model metadata
std::vector<server_model_meta> get_all_meta();
void load(const std::string & name, const std::vector<std::string> & extra_args);
// if auto_load is true, load the model with previous args if any
void load(const std::string & name, const std::vector<std::string> & extra_args, bool auto_load);
void unload(const std::string & name);
void unload_all();

View File

@ -5168,7 +5168,7 @@ public:
res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
return res;
}
models->load(name, extra_args);
models->load(name, extra_args, false);
res->ok({{"success", true}});
return res;
};