allow reusing args if auto_load

This commit is contained in:
Xuan Son Nguyen 2025-11-23 15:42:33 +01:00
parent f927e21ffc
commit 74685f4194
3 changed files with 35 additions and 22 deletions

View File

@ -322,7 +322,7 @@ void server_models::unload_lru() {
} }
} }
void server_models::load(const std::string & name, const std::vector<std::string> & extra_args) { void server_models::load(const std::string & name, const std::vector<std::string> & extra_args, bool auto_load) {
if (!has_model(name)) { if (!has_model(name)) {
throw std::runtime_error("model name=" + name + " is not found"); throw std::runtime_error("model name=" + name + " is not found");
} }
@ -352,26 +352,38 @@ void server_models::load(const std::string & name, const std::vector<std::string
std::string exec_path = get_server_exec_path().string(); std::string exec_path = get_server_exec_path().string();
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port); SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
std::vector<std::string> child_args = base_args; // copy std::vector<std::string> child_args;
if (inst.meta.in_cache) { if (auto_load && !meta.args.empty()) {
child_args.push_back("-hf"); child_args = meta.args; // reuse previous args
child_args.push_back(inst.meta.name); // update port arg
} else { for (size_t i = 0; i < child_args.size(); i++) {
child_args.push_back("-m"); if (child_args[i] == "--port" && i + 1 < child_args.size()) {
child_args.push_back(inst.meta.path); child_args[i + 1] = std::to_string(inst.meta.port);
if (!inst.meta.path_mmproj.empty()) { break;
child_args.push_back("--mmproj"); }
child_args.push_back(inst.meta.path_mmproj);
} }
} } else {
child_args.push_back("--alias"); child_args = base_args; // copy
child_args.push_back(inst.meta.name); if (inst.meta.in_cache) {
child_args.push_back("--port"); child_args.push_back("-hf");
child_args.push_back(std::to_string(inst.meta.port)); child_args.push_back(inst.meta.name);
} else {
child_args.push_back("-m");
child_args.push_back(inst.meta.path);
if (!inst.meta.path_mmproj.empty()) {
child_args.push_back("--mmproj");
child_args.push_back(inst.meta.path_mmproj);
}
}
child_args.push_back("--alias");
child_args.push_back(inst.meta.name);
child_args.push_back("--port");
child_args.push_back(std::to_string(inst.meta.port));
// append extra args // append extra args
for (const auto & arg : extra_args) { for (const auto & arg : extra_args) {
child_args.push_back(arg); child_args.push_back(arg);
}
} }
std::vector<std::string> child_env = base_env; // copy std::vector<std::string> child_env = base_env; // copy
@ -502,7 +514,7 @@ bool server_models::ensure_model_loaded(const std::string & name) {
return false; // already loaded return false; // already loaded
} }
SRV_INF("model name=%s is not loaded, loading...\n", name.c_str()); SRV_INF("model name=%s is not loaded, loading...\n", name.c_str());
load(name, {}); load(name, {}, true);
wait_until_loaded(name); wait_until_loaded(name);
{ {
// check final status // check final status

View File

@ -100,7 +100,8 @@ public:
// return a copy of all model metadata // return a copy of all model metadata
std::vector<server_model_meta> get_all_meta(); std::vector<server_model_meta> get_all_meta();
void load(const std::string & name, const std::vector<std::string> & extra_args); // if auto_load is true, load the model with previous args if any
void load(const std::string & name, const std::vector<std::string> & extra_args, bool auto_load);
void unload(const std::string & name); void unload(const std::string & name);
void unload_all(); void unload_all();

View File

@ -5168,7 +5168,7 @@ public:
res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST)); res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
return res; return res;
} }
models->load(name, extra_args); models->load(name, extra_args, false);
res->ok({{"success", true}}); res->ok({{"success", true}});
return res; return res;
}; };