Merge 121e192865 into 58062860af
This commit is contained in:
commit
9a4e3e6f9d
|
|
@ -2668,6 +2668,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
params.endpoint_slots = value;
|
params.endpoint_slots = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_SLOTS"));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_SLOTS"));
|
||||||
|
add_opt(common_arg({ "--endpoint-exit" },
|
||||||
|
string_format("enable POST /exit endpoint to shutdown the server (default: %s)",
|
||||||
|
params.endpoint_exit ? "enabled" : "disabled"),
|
||||||
|
[](common_params & params) { params.endpoint_exit = true; })
|
||||||
|
.set_examples({ LLAMA_EXAMPLE_SERVER })
|
||||||
|
.set_env("LLAMA_ARG_ENDPOINT_EXIT"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--slot-save-path"}, "PATH",
|
{"--slot-save-path"}, "PATH",
|
||||||
"path to save slot kv cache (default: disabled)",
|
"path to save slot kv cache (default: disabled)",
|
||||||
|
|
|
||||||
|
|
@ -489,6 +489,7 @@ struct common_params {
|
||||||
bool endpoint_slots = true;
|
bool endpoint_slots = true;
|
||||||
bool endpoint_props = false; // only control POST requests, not GET
|
bool endpoint_props = false; // only control POST requests, not GET
|
||||||
bool endpoint_metrics = false;
|
bool endpoint_metrics = false;
|
||||||
|
bool endpoint_exit = false;
|
||||||
|
|
||||||
// router server configs
|
// router server configs
|
||||||
std::string models_dir = ""; // directory containing models for the router server
|
std::string models_dir = ""; // directory containing models for the router server
|
||||||
|
|
|
||||||
|
|
@ -3590,6 +3590,59 @@ void server_routes::init_routes() {
|
||||||
res->ok(result->to_json());
|
res->ok(result->to_json());
|
||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
this->post_exit = [this](const server_http_req & req) {
|
||||||
|
auto res = std::make_unique<server_res_generator>(ctx_server);
|
||||||
|
|
||||||
|
if (!params.endpoint_exit) {
|
||||||
|
SRV_WRN("%s: exit endpoint called but exit endpoint is not enabled\n", __func__);
|
||||||
|
res->error(format_error_response("Exit endpoint is disabled.", ERROR_TYPE_NOT_SUPPORTED));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for confirmation token in request body
|
||||||
|
try {
|
||||||
|
const json body = json::parse(req.body);
|
||||||
|
const std::string confirm = json_value(body, "confirm", std::string());
|
||||||
|
|
||||||
|
if (confirm != "shutdown") {
|
||||||
|
res->error(format_error_response("Missing or invalid confirmation. Send {\"confirm\": \"shutdown\"}",
|
||||||
|
ERROR_TYPE_INVALID_REQUEST));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
res->error(format_error_response("Invalid request body. Expected JSON with {\"confirm\": \"shutdown\"}",
|
||||||
|
ERROR_TYPE_INVALID_REQUEST));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
SRV_INF("%s: exit endpoint called with valid confirmation token, initiating server shutdown...\n",
|
||||||
|
__func__);
|
||||||
|
|
||||||
|
res->ok({
|
||||||
|
{ "message", "Server shutdown initiated" },
|
||||||
|
{ "status", "terminating" }
|
||||||
|
});
|
||||||
|
|
||||||
|
// Schedule shutdown after response is sent. Use the explicitly provided on_shutdown callback
|
||||||
|
// if main() has set it; otherwise fall back to terminating the server queue (legacy behavior).
|
||||||
|
if (this->on_shutdown) {
|
||||||
|
auto shutdown_cb = this->on_shutdown;
|
||||||
|
std::thread([shutdown_cb]() {
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||||
|
SRV_INF("%s: executing on_shutdown callback...\n", __func__);
|
||||||
|
try {
|
||||||
|
shutdown_cb();
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
SRV_ERR("%s: on_shutdown callback threw: %s\n", __func__, e.what());
|
||||||
|
} catch (...) {
|
||||||
|
SRV_ERR("%s: on_shutdown callback threw unknown exception\n", __func__);
|
||||||
|
}
|
||||||
|
}).detach();
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<server_res_generator> server_routes::handle_slots_save(const server_http_req & req, int id_slot) {
|
std::unique_ptr<server_res_generator> server_routes::handle_slots_save(const server_http_req & req, int id_slot) {
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,8 @@ struct server_context {
|
||||||
struct server_res_generator;
|
struct server_res_generator;
|
||||||
|
|
||||||
struct server_routes {
|
struct server_routes {
|
||||||
server_routes(const common_params & params, server_context & ctx_server, std::function<bool()> is_ready = []() { return true; })
|
server_routes(const common_params & params, server_context & ctx_server, std::function<bool()> is_ready = []() { return true; }, std::function<void()> on_shutdown = nullptr)
|
||||||
: params(params), ctx_server(*ctx_server.impl), is_ready(is_ready) {
|
: params(params), ctx_server(*ctx_server.impl), is_ready(is_ready), on_shutdown(on_shutdown) {
|
||||||
init_routes();
|
init_routes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -80,6 +80,8 @@ struct server_routes {
|
||||||
server_http_context::handler_t post_rerank;
|
server_http_context::handler_t post_rerank;
|
||||||
server_http_context::handler_t get_lora_adapters;
|
server_http_context::handler_t get_lora_adapters;
|
||||||
server_http_context::handler_t post_lora_adapters;
|
server_http_context::handler_t post_lora_adapters;
|
||||||
|
server_http_context::handler_t post_exit;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// TODO: move these outside of server_routes?
|
// TODO: move these outside of server_routes?
|
||||||
std::unique_ptr<server_res_generator> handle_slots_save(const server_http_req & req, int id_slot);
|
std::unique_ptr<server_res_generator> handle_slots_save(const server_http_req & req, int id_slot);
|
||||||
|
|
@ -90,4 +92,5 @@ private:
|
||||||
const common_params & params;
|
const common_params & params;
|
||||||
server_context_impl & ctx_server;
|
server_context_impl & ctx_server;
|
||||||
std::function<bool()> is_ready;
|
std::function<bool()> is_ready;
|
||||||
|
const std::function<void()> on_shutdown;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -113,14 +113,29 @@ int main(int argc, char ** argv, char ** envp) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_router_server = params.model.path.empty();
|
||||||
|
|
||||||
|
// prepare shutdown callback depending on mode (capturing by reference is fine here I think,
|
||||||
|
// ctx_http and ctx_server live in main and outlive routes).
|
||||||
|
std::function<void()> shutdown_cb;
|
||||||
|
if (is_router_server) {
|
||||||
|
shutdown_cb = [&ctx_http]() {
|
||||||
|
ctx_http.stop();
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// ctx_server declared earlier and will outlive routes
|
||||||
|
shutdown_cb = [&ctx_server]() {
|
||||||
|
ctx_server.terminate();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Router
|
// Router
|
||||||
//
|
//
|
||||||
|
|
||||||
// register API routes
|
// register API routes
|
||||||
server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); });
|
server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); }, shutdown_cb);
|
||||||
|
|
||||||
bool is_router_server = params.model.path.empty();
|
|
||||||
std::optional<server_models_routes> models_routes{};
|
std::optional<server_models_routes> models_routes{};
|
||||||
if (is_router_server) {
|
if (is_router_server) {
|
||||||
// setup server instances manager
|
// setup server instances manager
|
||||||
|
|
@ -191,6 +206,9 @@ int main(int argc, char ** argv, char ** envp) {
|
||||||
ctx_http.get ("/slots", ex_wrapper(routes.get_slots));
|
ctx_http.get ("/slots", ex_wrapper(routes.get_slots));
|
||||||
ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots));
|
ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots));
|
||||||
|
|
||||||
|
// Exit endpoint
|
||||||
|
ctx_http.post("/exit", ex_wrapper(routes.post_exit));
|
||||||
|
|
||||||
//
|
//
|
||||||
// Start the server
|
// Start the server
|
||||||
//
|
//
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue