diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 285e1e7f7c..cf81540f5a 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -322,7 +322,7 @@ void server_models::unload_lru() { } } -void server_models::load(const std::string & name, const std::vector & extra_args) { +void server_models::load(const std::string & name, const std::vector & extra_args, bool auto_load) { if (!has_model(name)) { throw std::runtime_error("model name=" + name + " is not found"); } @@ -352,26 +352,38 @@ void server_models::load(const std::string & name, const std::vector child_args = base_args; // copy - if (inst.meta.in_cache) { - child_args.push_back("-hf"); - child_args.push_back(inst.meta.name); - } else { - child_args.push_back("-m"); - child_args.push_back(inst.meta.path); - if (!inst.meta.path_mmproj.empty()) { - child_args.push_back("--mmproj"); - child_args.push_back(inst.meta.path_mmproj); + std::vector child_args; + if (auto_load && !meta.args.empty()) { + child_args = meta.args; // reuse previous args + // update port arg + for (size_t i = 0; i < child_args.size(); i++) { + if (child_args[i] == "--port" && i + 1 < child_args.size()) { + child_args[i + 1] = std::to_string(inst.meta.port); + break; + } } - } - child_args.push_back("--alias"); - child_args.push_back(inst.meta.name); - child_args.push_back("--port"); - child_args.push_back(std::to_string(inst.meta.port)); + } else { + child_args = base_args; // copy + if (inst.meta.in_cache) { + child_args.push_back("-hf"); + child_args.push_back(inst.meta.name); + } else { + child_args.push_back("-m"); + child_args.push_back(inst.meta.path); + if (!inst.meta.path_mmproj.empty()) { + child_args.push_back("--mmproj"); + child_args.push_back(inst.meta.path_mmproj); + } + } + child_args.push_back("--alias"); + child_args.push_back(inst.meta.name); + child_args.push_back("--port"); + child_args.push_back(std::to_string(inst.meta.port)); - // append extra args - for (const auto & arg : extra_args) { - child_args.push_back(arg); + // append extra args + for (const auto & arg : extra_args) { + child_args.push_back(arg); + } } std::vector child_env = base_env; // copy @@ -502,7 +514,7 @@ bool server_models::ensure_model_loaded(const std::string & name) { return false; // already loaded } SRV_INF("model name=%s is not loaded, loading...\n", name.c_str()); - load(name, {}); + load(name, {}, true); wait_until_loaded(name); { // check final status diff --git a/tools/server/server-models.h b/tools/server/server-models.h index e192d3dd6e..ed08c5023e 100644 --- a/tools/server/server-models.h +++ b/tools/server/server-models.h @@ -100,7 +100,8 @@ public: // return a copy of all model metadata std::vector get_all_meta(); - void load(const std::string & name, const std::vector & extra_args); + // if auto_load is true, load the model with previous args if any + void load(const std::string & name, const std::vector & extra_args, bool auto_load); void unload(const std::string & name); void unload_all(); diff --git a/tools/server/server.cpp b/tools/server/server.cpp index ab825e24ba..bf06cc5133 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -5168,7 +5168,7 @@ public: res->error(format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST)); return res; } - models->load(name, extra_args); + models->load(name, extra_args, false); res->ok({{"success", true}}); return res; };