fix invalid ptr to shutdown_handler

This commit is contained in:
Xuan Son Nguyen 2025-11-30 15:31:05 +01:00
parent 7b28b5e16a
commit 4a1c05c383
3 changed files with 10 additions and 5 deletions

View File

@ -570,7 +570,7 @@ server_http_res_ptr server_models::proxy_request(const server_http_req & req, co
return proxy;
}
void server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler) {
std::thread server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler) {
// send a notification to the router server that a model instance is ready
// TODO @ngxson : use HTTP client from libcommon
httplib::Client cli(base_params.hostname, router_port);
@ -598,7 +598,7 @@ void server_models::setup_child_server(const common_params & base_params, int ro
}
// setup thread for monitoring stdin
std::thread([shutdown_handler]() {
return std::thread([shutdown_handler]() {
// wait for EOF on stdin
SRV_INF("%s", "child server monitoring thread started, waiting for EOF on stdin...\n");
bool eof = false;
@ -619,7 +619,7 @@ void server_models::setup_child_server(const common_params & base_params, int ro
SRV_INF("%s", "EOF on stdin detected, forcing shutdown...\n");
exit(1);
}
}).detach();
});
}

View File

@ -122,7 +122,8 @@ public:
server_http_res_ptr proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used);
// notify the router server that a model instance is ready
static void setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
// return the monitoring thread (to be joined by the caller)
static std::thread setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
};
struct server_models_routes {

View File

@ -276,8 +276,9 @@ int main(int argc, char ** argv, char ** envp) {
// optionally, notify router server that this instance is ready
const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT");
std::thread monitor_thread;
if (router_port != nullptr) {
server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler);
monitor_thread = server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler);
}
// this call blocks the main thread until queue_tasks.terminate() is called
@ -287,6 +288,9 @@ int main(int argc, char ** argv, char ** envp) {
if (ctx_http.thread.joinable()) {
ctx_http.thread.join();
}
if (monitor_thread.joinable()) {
monitor_thread.join();
}
llama_memory_breakdown_print(ctx_server.get_llama_context());
}