fix invalid ptr to shutdown_handler

2025-11-30 15:31:05 +01:00 · 2025-11-30 15:31:05 +01:00 · 4a1c05c383
parent 7b28b5e16a
commit 4a1c05c383
3 changed files with 10 additions and 5 deletions
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@ -570,7 +570,7 @@ server_http_res_ptr server_models::proxy_request(const server_http_req & req, co
    return proxy;
 }

-void server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler) {
+std::thread server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler) {
    // send a notification to the router server that a model instance is ready
    // TODO @ngxson : use HTTP client from libcommon
    httplib::Client cli(base_params.hostname, router_port);
@ -598,7 +598,7 @@ void server_models::setup_child_server(const common_params & base_params, int ro
    }

    // setup thread for monitoring stdin
-    std::thread([shutdown_handler]() {
+    return std::thread([shutdown_handler]() {
        // wait for EOF on stdin
        SRV_INF("%s", "child server monitoring thread started, waiting for EOF on stdin...\n");
        bool eof = false;
@ -619,7 +619,7 @@ void server_models::setup_child_server(const common_params & base_params, int ro
            SRV_INF("%s", "EOF on stdin detected, forcing shutdown...\n");
            exit(1);
        }
-    }).detach();
+    });
 }


--- a/tools/server/server-models.h
+++ b/tools/server/server-models.h
@ -122,7 +122,8 @@ public:
    server_http_res_ptr proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used);

    // notify the router server that a model instance is ready
-    static void setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
+    // return the monitoring thread (to be joined by the caller)
+    static std::thread setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
 };

 struct server_models_routes {
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@ -276,8 +276,9 @@ int main(int argc, char ** argv, char ** envp) {

        // optionally, notify router server that this instance is ready
        const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT");
+        std::thread monitor_thread;
        if (router_port != nullptr) {
-            server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler);
+            monitor_thread = server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler);
        }

        // this call blocks the main thread until queue_tasks.terminate() is called
@ -287,6 +288,9 @@ int main(int argc, char ** argv, char ** envp) {
        if (ctx_http.thread.joinable()) {
            ctx_http.thread.join();
        }
+        if (monitor_thread.joinable()) {
+            monitor_thread.join();
+        }
        llama_memory_breakdown_print(ctx_server.get_llama_context());
    }