diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp
index 3a5bd2b215..ae2b78efba 100644
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@@ -570,7 +570,7 @@ server_http_res_ptr server_models::proxy_request(const server_http_req & req, co
     return proxy;
 }
 
-void server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler) {
+std::thread server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler) {
     // send a notification to the router server that a model instance is ready
     // TODO @ngxson : use HTTP client from libcommon
     httplib::Client cli(base_params.hostname, router_port);
@@ -598,7 +598,7 @@ void server_models::setup_child_server(const common_params & base_params, int ro
     }
 
     // setup thread for monitoring stdin
-    std::thread([shutdown_handler]() {
+    return std::thread([shutdown_handler]() {
         // wait for EOF on stdin
         SRV_INF("%s", "child server monitoring thread started, waiting for EOF on stdin...\n");
         bool eof = false;
@@ -619,7 +619,7 @@ void server_models::setup_child_server(const common_params & base_params, int ro
             SRV_INF("%s", "EOF on stdin detected, forcing shutdown...\n");
             exit(1);
         }
-    }).detach();
+    });
 }
 
 
diff --git a/tools/server/server-models.h b/tools/server/server-models.h
index 552e750f58..029ba2fad8 100644
--- a/tools/server/server-models.h
+++ b/tools/server/server-models.h
@@ -122,7 +122,8 @@ public:
     server_http_res_ptr proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used);
 
     // notify the router server that a model instance is ready
-    static void setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
+    // return the monitoring thread (to be joined by the caller)
+    static std::thread setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
 };
 
 struct server_models_routes {
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index d78c21a875..950537d82d 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -276,8 +276,9 @@ int main(int argc, char ** argv, char ** envp) {
 
         // optionally, notify router server that this instance is ready
         const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT");
+        std::thread monitor_thread;
         if (router_port != nullptr) {
-            server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler);
+            monitor_thread = server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler);
         }
 
         // this call blocks the main thread until queue_tasks.terminate() is called
@@ -287,6 +288,9 @@ int main(int argc, char ** argv, char ** envp) {
         if (ctx_http.thread.joinable()) {
             ctx_http.thread.join();
         }
+        if (monitor_thread.joinable()) {
+            monitor_thread.join();
+        }
         llama_memory_breakdown_print(ctx_server.get_llama_context());
     }