diff --git a/common/arg.cpp b/common/arg.cpp
index 5bab9abc77..538d2a4b0a 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2807,6 +2807,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.port = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_PORT"));
+ add_opt(common_arg(
+ {"--reuse-port"},
+ string_format("allow multiple sockets to bind to the same port (default: %s)", params.reuse_port ? "enabled" : "disabled"),
+ [](common_params & params) {
+ params.reuse_port = true;
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_REUSE_PORT"));
add_opt(common_arg(
{"--path"}, "PATH",
string_format("path to serve static files from (default: %s)", params.public_path.c_str()),
diff --git a/common/common.h b/common/common.h
index fde5ba996e..57bd9cf905 100644
--- a/common/common.h
+++ b/common/common.h
@@ -573,6 +573,7 @@ struct common_params {
// server params
int32_t port = 8080; // server listens on this network port
+ bool reuse_port = false; // allow multiple sockets to bind to the same port
int32_t timeout_read = 600; // http read timeout in seconds
int32_t timeout_write = timeout_read; // http write timeout in seconds
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
diff --git a/tools/server/README.md b/tools/server/README.md
index f99103a584..4d80b6c56e 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -188,6 +188,7 @@ For the full list of features, please refer to [server's changelog](https://gith
| `--tags STRING` | set model tags, comma-separated (informational, not used for routing)
(env: LLAMA_ARG_TAGS) |
| `--host HOST` | ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: 127.0.0.1)
(env: LLAMA_ARG_HOST) |
| `--port PORT` | port to listen (default: 8080)
(env: LLAMA_ARG_PORT) |
+| `--reuse-port` | allow multiple sockets to bind to the same port (default: disabled)
(env: LLAMA_ARG_REUSE_PORT) |
| `--path PATH` | path to serve static files from (default: )
(env: LLAMA_ARG_STATIC_PATH) |
| `--api-prefix PREFIX` | prefix path the server serves from, without the trailing slash (default: )
(env: LLAMA_ARG_API_PREFIX) |
| `--webui-config JSON` | JSON that provides default WebUI settings (overrides WebUI defaults)
(env: LLAMA_ARG_WEBUI_CONFIG) |
diff --git a/tools/server/server-http.cpp b/tools/server/server-http.cpp
index 429cddcc2e..1dabaeee28 100644
--- a/tools/server/server-http.cpp
+++ b/tools/server/server-http.cpp
@@ -112,6 +112,22 @@ bool server_http_context::init(const common_params & params) {
// set timeouts and change hostname and port
srv->set_read_timeout (params.timeout_read);
srv->set_write_timeout(params.timeout_write);
+ srv->set_socket_options([reuse_port = params.reuse_port](socket_t sock) {
+ int opt = 1;
+#ifdef _WIN32
+ const char * optval = (const char *)&opt;
+#else
+ const void * optval = &opt;
+#endif
+ setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, optval, sizeof(opt));
+ if (reuse_port) {
+#ifdef SO_REUSEPORT
+ setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, optval, sizeof(opt));
+#else
+ LOG_WRN("%s: SO_REUSEPORT is not supported\n", __func__);
+#endif
+ }
+ });
if (params.api_keys.size() == 1) {
auto key = params.api_keys[0];