server: fix duplicate HTTP headers in multiple models mode (#17698)

* llama-server: fix duplicate HTTP headers in multiple models mode (#17693)

* llama-server: address review feedback from ngxson

- restrict scope of header after std::move
- simplify header check (remove unordered_set)
This commit is contained in:
Pascal 2025-12-03 10:28:43 +01:00 committed by GitHub
parent 7ca5991d2b
commit 5ceed62421
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 47 additions and 10 deletions

View File

@ -7,6 +7,7 @@
#include <sheredom/subprocess.h> #include <sheredom/subprocess.h>
#include <functional> #include <functional>
#include <algorithm>
#include <thread> #include <thread>
#include <mutex> #include <mutex>
#include <condition_variable> #include <condition_variable>
@ -889,6 +890,28 @@ struct pipe_t {
} }
}; };
static std::string to_lower_copy(const std::string & value) {
std::string lowered(value.size(), '\0');
std::transform(value.begin(), value.end(), lowered.begin(), [](unsigned char c) { return std::tolower(c); });
return lowered;
}
static bool should_strip_proxy_header(const std::string & header_name) {
// Headers that get duplicated when router forwards child responses
if (header_name == "server" ||
header_name == "transfer-encoding" ||
header_name == "keep-alive") {
return true;
}
// Router injects CORS, child also sends them: duplicate
if (header_name.rfind("access-control-", 0) == 0) {
return true;
}
return false;
}
server_http_proxy::server_http_proxy( server_http_proxy::server_http_proxy(
const std::string & method, const std::string & method,
const std::string & host, const std::string & host,
@ -925,6 +948,14 @@ server_http_proxy::server_http_proxy(
msg_t msg; msg_t msg;
msg.status = response.status; msg.status = response.status;
for (const auto & [key, value] : response.headers) { for (const auto & [key, value] : response.headers) {
const auto lowered = to_lower_copy(key);
if (should_strip_proxy_header(lowered)) {
continue;
}
if (lowered == "content-type") {
msg.content_type = value;
continue;
}
msg.headers[key] = value; msg.headers[key] = value;
} }
return pipe->write(std::move(msg)); // send headers first return pipe->write(std::move(msg)); // send headers first
@ -932,7 +963,7 @@ server_http_proxy::server_http_proxy(
httplib::ContentReceiverWithProgress content_receiver = [pipe](const char * data, size_t data_length, size_t, size_t) { httplib::ContentReceiverWithProgress content_receiver = [pipe](const char * data, size_t data_length, size_t, size_t) {
// send data chunks // send data chunks
// returns false if pipe is closed / broken (signal to stop receiving) // returns false if pipe is closed / broken (signal to stop receiving)
return pipe->write({{}, 0, std::string(data, data_length)}); return pipe->write({{}, 0, std::string(data, data_length), ""});
}; };
// prepare the request to destination server // prepare the request to destination server
@ -955,8 +986,8 @@ server_http_proxy::server_http_proxy(
if (result.error() != httplib::Error::Success) { if (result.error() != httplib::Error::Success) {
auto err_str = httplib::to_string(result.error()); auto err_str = httplib::to_string(result.error());
SRV_ERR("http client error: %s\n", err_str.c_str()); SRV_ERR("http client error: %s\n", err_str.c_str());
pipe->write({{}, 500, ""}); // header pipe->write({{}, 500, "", ""}); // header
pipe->write({{}, 0, "proxy error: " + err_str}); // body pipe->write({{}, 0, "proxy error: " + err_str, ""}); // body
} }
pipe->close_write(); // signal EOF to reader pipe->close_write(); // signal EOF to reader
SRV_DBG("%s", "client request thread ended\n"); SRV_DBG("%s", "client request thread ended\n");
@ -964,12 +995,17 @@ server_http_proxy::server_http_proxy(
this->thread.detach(); this->thread.detach();
// wait for the first chunk (headers) // wait for the first chunk (headers)
msg_t header; {
if (pipe->read(header, should_stop)) { msg_t header;
SRV_DBG("%s", "received response headers\n"); if (pipe->read(header, should_stop)) {
this->status = header.status; SRV_DBG("%s", "received response headers\n");
this->headers = header.headers; this->status = header.status;
} else { this->headers = std::move(header.headers);
SRV_DBG("%s", "no response headers received (request cancelled?)\n"); if (!header.content_type.empty()) {
this->content_type = std::move(header.content_type);
}
} else {
SRV_DBG("%s", "no response headers received (request cancelled?)\n");
}
} }
} }

View File

@ -170,5 +170,6 @@ private:
std::map<std::string, std::string> headers; std::map<std::string, std::string> headers;
int status = 0; int status = 0;
std::string data; std::string data;
std::string content_type;
}; };
}; };