Merge 121e192865 into 58062860af
This commit is contained in:
commit
9a4e3e6f9d
|
|
@ -2668,6 +2668,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.endpoint_slots = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_SLOTS"));
|
||||
add_opt(common_arg({ "--endpoint-exit" },
|
||||
string_format("enable POST /exit endpoint to shutdown the server (default: %s)",
|
||||
params.endpoint_exit ? "enabled" : "disabled"),
|
||||
[](common_params & params) { params.endpoint_exit = true; })
|
||||
.set_examples({ LLAMA_EXAMPLE_SERVER })
|
||||
.set_env("LLAMA_ARG_ENDPOINT_EXIT"));
|
||||
add_opt(common_arg(
|
||||
{"--slot-save-path"}, "PATH",
|
||||
"path to save slot kv cache (default: disabled)",
|
||||
|
|
|
|||
|
|
@ -489,6 +489,7 @@ struct common_params {
|
|||
bool endpoint_slots = true;
|
||||
bool endpoint_props = false; // only control POST requests, not GET
|
||||
bool endpoint_metrics = false;
|
||||
bool endpoint_exit = false;
|
||||
|
||||
// router server configs
|
||||
std::string models_dir = ""; // directory containing models for the router server
|
||||
|
|
|
|||
|
|
@ -3590,6 +3590,59 @@ void server_routes::init_routes() {
|
|||
res->ok(result->to_json());
|
||||
return res;
|
||||
};
|
||||
|
||||
this->post_exit = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_res_generator>(ctx_server);
|
||||
|
||||
if (!params.endpoint_exit) {
|
||||
SRV_WRN("%s: exit endpoint called but exit endpoint is not enabled\n", __func__);
|
||||
res->error(format_error_response("Exit endpoint is disabled.", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return res;
|
||||
}
|
||||
|
||||
// Check for confirmation token in request body
|
||||
try {
|
||||
const json body = json::parse(req.body);
|
||||
const std::string confirm = json_value(body, "confirm", std::string());
|
||||
|
||||
if (confirm != "shutdown") {
|
||||
res->error(format_error_response("Missing or invalid confirmation. Send {\"confirm\": \"shutdown\"}",
|
||||
ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
res->error(format_error_response("Invalid request body. Expected JSON with {\"confirm\": \"shutdown\"}",
|
||||
ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
|
||||
SRV_INF("%s: exit endpoint called with valid confirmation token, initiating server shutdown...\n",
|
||||
__func__);
|
||||
|
||||
res->ok({
|
||||
{ "message", "Server shutdown initiated" },
|
||||
{ "status", "terminating" }
|
||||
});
|
||||
|
||||
// Schedule shutdown after response is sent. Use the explicitly provided on_shutdown callback
|
||||
// if main() has set it; otherwise fall back to terminating the server queue (legacy behavior).
|
||||
if (this->on_shutdown) {
|
||||
auto shutdown_cb = this->on_shutdown;
|
||||
std::thread([shutdown_cb]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
SRV_INF("%s: executing on_shutdown callback...\n", __func__);
|
||||
try {
|
||||
shutdown_cb();
|
||||
} catch (const std::exception & e) {
|
||||
SRV_ERR("%s: on_shutdown callback threw: %s\n", __func__, e.what());
|
||||
} catch (...) {
|
||||
SRV_ERR("%s: on_shutdown callback threw unknown exception\n", __func__);
|
||||
}
|
||||
}).detach();
|
||||
}
|
||||
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
std::unique_ptr<server_res_generator> server_routes::handle_slots_save(const server_http_req & req, int id_slot) {
|
||||
|
|
|
|||
|
|
@ -51,8 +51,8 @@ struct server_context {
|
|||
struct server_res_generator;
|
||||
|
||||
struct server_routes {
|
||||
server_routes(const common_params & params, server_context & ctx_server, std::function<bool()> is_ready = []() { return true; })
|
||||
: params(params), ctx_server(*ctx_server.impl), is_ready(is_ready) {
|
||||
server_routes(const common_params & params, server_context & ctx_server, std::function<bool()> is_ready = []() { return true; }, std::function<void()> on_shutdown = nullptr)
|
||||
: params(params), ctx_server(*ctx_server.impl), is_ready(is_ready), on_shutdown(on_shutdown) {
|
||||
init_routes();
|
||||
}
|
||||
|
||||
|
|
@ -80,6 +80,8 @@ struct server_routes {
|
|||
server_http_context::handler_t post_rerank;
|
||||
server_http_context::handler_t get_lora_adapters;
|
||||
server_http_context::handler_t post_lora_adapters;
|
||||
server_http_context::handler_t post_exit;
|
||||
|
||||
private:
|
||||
// TODO: move these outside of server_routes?
|
||||
std::unique_ptr<server_res_generator> handle_slots_save(const server_http_req & req, int id_slot);
|
||||
|
|
@ -90,4 +92,5 @@ private:
|
|||
const common_params & params;
|
||||
server_context_impl & ctx_server;
|
||||
std::function<bool()> is_ready;
|
||||
const std::function<void()> on_shutdown;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -113,14 +113,29 @@ int main(int argc, char ** argv, char ** envp) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
bool is_router_server = params.model.path.empty();
|
||||
|
||||
// prepare shutdown callback depending on mode (capturing by reference is fine here I think,
|
||||
// ctx_http and ctx_server live in main and outlive routes).
|
||||
std::function<void()> shutdown_cb;
|
||||
if (is_router_server) {
|
||||
shutdown_cb = [&ctx_http]() {
|
||||
ctx_http.stop();
|
||||
};
|
||||
} else {
|
||||
// ctx_server declared earlier and will outlive routes
|
||||
shutdown_cb = [&ctx_server]() {
|
||||
ctx_server.terminate();
|
||||
};
|
||||
}
|
||||
|
||||
//
|
||||
// Router
|
||||
//
|
||||
|
||||
// register API routes
|
||||
server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); });
|
||||
server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); }, shutdown_cb);
|
||||
|
||||
bool is_router_server = params.model.path.empty();
|
||||
std::optional<server_models_routes> models_routes{};
|
||||
if (is_router_server) {
|
||||
// setup server instances manager
|
||||
|
|
@ -191,6 +206,9 @@ int main(int argc, char ** argv, char ** envp) {
|
|||
ctx_http.get ("/slots", ex_wrapper(routes.get_slots));
|
||||
ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots));
|
||||
|
||||
// Exit endpoint
|
||||
ctx_http.post("/exit", ex_wrapper(routes.post_exit));
|
||||
|
||||
//
|
||||
// Start the server
|
||||
//
|
||||
|
|
|
|||
Loading…
Reference in New Issue