decouple server_models from server_routes
This commit is contained in:
parent
c1dfccd078
commit
a82dbbfb30
|
|
@ -621,6 +621,201 @@ void server_models::setup_child_server(const common_params & base_params, int ro
|
|||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// server_models_routes
|
||||
//
|
||||
|
||||
static void res_ok(std::unique_ptr<server_http_res> & res, const json & response_data) {
|
||||
res->status = 200;
|
||||
res->data = safe_json_to_str(response_data);
|
||||
}
|
||||
|
||||
static void res_error(std::unique_ptr<server_http_res> & res, const json & error_data) {
|
||||
res->status = json_value(error_data, "code", 500);
|
||||
res->data = safe_json_to_str({{ "error", error_data }});
|
||||
}
|
||||
|
||||
static bool router_validate_model(const std::string & name, server_models & models, bool models_autoload, std::unique_ptr<server_http_res> & res) {
|
||||
if (name.empty()) {
|
||||
res_error(res, format_error_response("model name is missing from the request", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
auto meta = models.get_meta(name);
|
||||
if (!meta.has_value()) {
|
||||
res_error(res, format_error_response("model not found", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
if (models_autoload) {
|
||||
models.ensure_model_loaded(name);
|
||||
} else {
|
||||
if (meta->status != SERVER_MODEL_STATUS_LOADED) {
|
||||
res_error(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void server_models_routes::init_routes() {
|
||||
this->get_router_props = [this](const server_http_req & req) {
|
||||
std::string name = req.get_param("model");
|
||||
if (name.empty()) {
|
||||
// main instance
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
res_ok(res, {
|
||||
// TODO: add support for this on web UI
|
||||
{"role", "router"},
|
||||
{"max_instances", 4}, // dummy value for testing
|
||||
// this is a dummy response to make sure webui doesn't break
|
||||
{"model_alias", "llama-server"},
|
||||
{"model_path", "none"},
|
||||
{"default_generation_settings", {
|
||||
{"params", json{}},
|
||||
{"n_ctx", 0},
|
||||
}},
|
||||
});
|
||||
return res;
|
||||
}
|
||||
return proxy_get(req);
|
||||
};
|
||||
|
||||
this->proxy_get = [this](const server_http_req & req) {
|
||||
std::string method = "GET";
|
||||
std::string name = req.get_param("model");
|
||||
auto error_res = std::make_unique<server_http_res>();
|
||||
if (!router_validate_model(name, models, params.models_autoload, error_res)) {
|
||||
return error_res;
|
||||
}
|
||||
return models.proxy_request(req, method, name, false);
|
||||
};
|
||||
|
||||
this->proxy_post = [this](const server_http_req & req) {
|
||||
std::string method = "POST";
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
auto error_res = std::make_unique<server_http_res>();
|
||||
if (!router_validate_model(name, models, params.models_autoload, error_res)) {
|
||||
return error_res;
|
||||
}
|
||||
return models.proxy_request(req, method, name, true); // update last usage for POST request only
|
||||
};
|
||||
|
||||
this->get_router_models = [this](const server_http_req &) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json models_json = json::array();
|
||||
auto all_models = models.get_all_meta();
|
||||
std::time_t t = std::time(0);
|
||||
for (const auto & meta : all_models) {
|
||||
json status {
|
||||
{"value", server_model_status_to_string(meta.status)},
|
||||
{"args", meta.args},
|
||||
};
|
||||
if (meta.is_failed()) {
|
||||
status["exit_code"] = meta.exit_code;
|
||||
status["failed"] = true;
|
||||
}
|
||||
models_json.push_back(json {
|
||||
{"id", meta.name},
|
||||
{"object", "model"}, // for OAI-compat
|
||||
{"owned_by", "llamacpp"}, // for OAI-compat
|
||||
{"created", t}, // for OAI-compat
|
||||
{"in_cache", meta.in_cache},
|
||||
{"path", meta.path},
|
||||
{"status", status},
|
||||
// TODO: add other fields, may require reading GGUF metadata
|
||||
});
|
||||
}
|
||||
res_ok(res, {
|
||||
{"data", models_json},
|
||||
{"object", "list"},
|
||||
});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->post_router_models_load = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
auto model = models.get_meta(name);
|
||||
if (!model.has_value()) {
|
||||
res_error(res, format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
|
||||
return res;
|
||||
}
|
||||
if (model->status == SERVER_MODEL_STATUS_LOADED) {
|
||||
res_error(res, format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
models.load(name, false);
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
|
||||
// used by child process to notify the router about status change
|
||||
// TODO @ngxson : maybe implement authentication for this endpoint in the future
|
||||
this->post_router_models_status = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string model = json_value(body, "model", std::string());
|
||||
std::string value = json_value(body, "value", std::string());
|
||||
models.update_status(model, server_model_status_from_string(value));
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->get_router_models = [this](const server_http_req &) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json models_json = json::array();
|
||||
auto all_models = models.get_all_meta();
|
||||
std::time_t t = std::time(0);
|
||||
for (const auto & meta : all_models) {
|
||||
json status {
|
||||
{"value", server_model_status_to_string(meta.status)},
|
||||
{"args", meta.args},
|
||||
};
|
||||
if (meta.is_failed()) {
|
||||
status["exit_code"] = meta.exit_code;
|
||||
status["failed"] = true;
|
||||
}
|
||||
models_json.push_back(json {
|
||||
{"id", meta.name},
|
||||
{"object", "model"}, // for OAI-compat
|
||||
{"owned_by", "llamacpp"}, // for OAI-compat
|
||||
{"created", t}, // for OAI-compat
|
||||
{"in_cache", meta.in_cache},
|
||||
{"path", meta.path},
|
||||
{"status", status},
|
||||
// TODO: add other fields, may require reading GGUF metadata
|
||||
});
|
||||
}
|
||||
res_ok(res, {
|
||||
{"data", models_json},
|
||||
{"object", "list"},
|
||||
});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->post_router_models_unload = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
auto model = models.get_meta(name);
|
||||
if (!model.has_value()) {
|
||||
res_error(res, format_error_response("model is not found", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
if (model->status != SERVER_MODEL_STATUS_LOADED) {
|
||||
res_error(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
models.unload(name);
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// server_http_proxy
|
||||
//
|
||||
|
|
|
|||
|
|
@ -125,6 +125,25 @@ public:
|
|||
static void setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
|
||||
};
|
||||
|
||||
struct server_models_routes {
|
||||
common_params params;
|
||||
server_models models;
|
||||
server_models_routes(const common_params & params, int argc, char ** argv, char ** envp)
|
||||
: params(params), models(params, argc, argv, envp) {
|
||||
init_routes();
|
||||
}
|
||||
|
||||
void init_routes();
|
||||
// handlers using lambda function, so that they can capture `this` without `std::bind`
|
||||
server_http_context::handler_t get_router_props;
|
||||
server_http_context::handler_t proxy_get;
|
||||
server_http_context::handler_t proxy_post;
|
||||
server_http_context::handler_t get_router_models;
|
||||
server_http_context::handler_t post_router_models_load;
|
||||
server_http_context::handler_t post_router_models_status;
|
||||
server_http_context::handler_t post_router_models_unload;
|
||||
};
|
||||
|
||||
/**
|
||||
* A simple HTTP proxy that forwards requests to another server
|
||||
* and relays the responses back.
|
||||
|
|
|
|||
|
|
@ -108,36 +108,37 @@ int main(int argc, char ** argv, char ** envp) {
|
|||
server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); });
|
||||
|
||||
bool is_router_server = params.model.path.empty();
|
||||
std::optional<server_models_routes> models_routes{};
|
||||
if (is_router_server) {
|
||||
// setup server instances manager
|
||||
routes.models.reset(new server_models(params, argc, argv, envp));
|
||||
models_routes.emplace(params, argc, argv, envp);
|
||||
|
||||
// proxy handlers
|
||||
// note: routes.get_health stays the same
|
||||
routes.get_metrics = routes.proxy_get;
|
||||
routes.post_props = routes.proxy_post;
|
||||
routes.get_api_show = routes.proxy_get;
|
||||
routes.post_completions = routes.proxy_post;
|
||||
routes.post_completions_oai = routes.proxy_post;
|
||||
routes.post_chat_completions = routes.proxy_post;
|
||||
routes.post_infill = routes.proxy_post;
|
||||
routes.post_embeddings = routes.proxy_post;
|
||||
routes.post_embeddings_oai = routes.proxy_post;
|
||||
routes.post_rerank = routes.proxy_post;
|
||||
routes.post_tokenize = routes.proxy_post;
|
||||
routes.post_detokenize = routes.proxy_post;
|
||||
routes.post_apply_template = routes.proxy_post;
|
||||
routes.get_lora_adapters = routes.proxy_get;
|
||||
routes.post_lora_adapters = routes.proxy_post;
|
||||
routes.get_slots = routes.proxy_get;
|
||||
routes.post_slots = routes.proxy_post;
|
||||
routes.get_metrics = models_routes->proxy_get;
|
||||
routes.post_props = models_routes->proxy_post;
|
||||
routes.get_api_show = models_routes->proxy_get;
|
||||
routes.post_completions = models_routes->proxy_post;
|
||||
routes.post_completions_oai = models_routes->proxy_post;
|
||||
routes.post_chat_completions = models_routes->proxy_post;
|
||||
routes.post_infill = models_routes->proxy_post;
|
||||
routes.post_embeddings = models_routes->proxy_post;
|
||||
routes.post_embeddings_oai = models_routes->proxy_post;
|
||||
routes.post_rerank = models_routes->proxy_post;
|
||||
routes.post_tokenize = models_routes->proxy_post;
|
||||
routes.post_detokenize = models_routes->proxy_post;
|
||||
routes.post_apply_template = models_routes->proxy_post;
|
||||
routes.get_lora_adapters = models_routes->proxy_get;
|
||||
routes.post_lora_adapters = models_routes->proxy_post;
|
||||
routes.get_slots = models_routes->proxy_get;
|
||||
routes.post_slots = models_routes->proxy_post;
|
||||
|
||||
// custom routes for router
|
||||
routes.get_props = routes.get_router_props;
|
||||
routes.get_models = routes.get_router_models;
|
||||
ctx_http.post("/models/load", ex_wrapper(routes.post_router_models_load));
|
||||
ctx_http.post("/models/unload", ex_wrapper(routes.post_router_models_unload));
|
||||
ctx_http.post("/models/status", ex_wrapper(routes.post_router_models_status));
|
||||
routes.get_props = models_routes->get_router_props;
|
||||
routes.get_models = models_routes->get_router_models;
|
||||
ctx_http.post("/models/load", ex_wrapper(models_routes->post_router_models_load));
|
||||
ctx_http.post("/models/unload", ex_wrapper(models_routes->post_router_models_unload));
|
||||
ctx_http.post("/models/status", ex_wrapper(models_routes->post_router_models_status));
|
||||
}
|
||||
|
||||
ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
||||
|
|
@ -184,9 +185,11 @@ int main(int argc, char ** argv, char ** envp) {
|
|||
if (is_router_server) {
|
||||
LOG_INF("%s: starting router server, no model will be loaded in this process\n", __func__);
|
||||
|
||||
clean_up = [&routes]() {
|
||||
clean_up = [&models_routes]() {
|
||||
SRV_INF("%s: cleaning up before exit...\n", __func__);
|
||||
routes.models->unload_all();
|
||||
if (models_routes.has_value()) {
|
||||
models_routes->models.unload_all();
|
||||
}
|
||||
llama_backend_free();
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue