server: use httplib dynamic threads (#20817)

* server: use httplib dynamic threads

* change to n_threads_http + 1024
This commit is contained in:
Xuan-Son Nguyen 2026-03-23 12:22:46 +01:00 committed by GitHub
parent e32d243849
commit 31a5cf4c3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 9 additions and 3 deletions

View File

@ -227,11 +227,17 @@ bool server_http_context::init(const common_params & params) {
int n_threads_http = params.n_threads_http;
if (n_threads_http < 1) {
// +2 threads for monitoring endpoints
n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
// +4 threads for monitoring, health and some threads reserved for MCP and other tasks in the future
n_threads_http = std::max(params.n_parallel + 4, (int32_t) std::thread::hardware_concurrency() - 1);
}
LOG_INF("%s: using %d threads for HTTP server\n", __func__, n_threads_http);
srv->new_task_queue = [n_threads_http] { return new httplib::ThreadPool(n_threads_http); };
srv->new_task_queue = [n_threads_http] {
// spawn n_threads_http fixed thread (always alive), while allow up to 1024 max possible additional threads
// when n_threads_http is used, server will create new "dynamic" threads that will be destroyed after processing each request
// ref: https://github.com/yhirose/cpp-httplib/pull/2368
size_t max_threads = (size_t)n_threads_http + 1024;
return new httplib::ThreadPool(n_threads_http, max_threads);
};
//
// Web UI setup