From aec7bdb42b33b3177ae232ba36d08535eeaae1fe Mon Sep 17 00:00:00 2001 From: Kritavya Date: Sat, 13 Dec 2025 21:51:25 +0530 Subject: [PATCH] server: add /v1/metrics endpoint --- tools/server/server-context.cpp | 21 +++++++++++++++++++++ tools/server/server-context.h | 8 ++++++-- tools/server/server.cpp | 6 +++++- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 90898b5ec4..ac3c263b8b 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -2912,6 +2912,27 @@ void server_routes::init_routes() { return res; }; + this->get_v1_metrics = [this](const server_http_req &) { + auto res = std::make_unique(ctx_server); + + // Calculate uptime in seconds + int64_t uptime_sec = (llama_time_us() - t_server_start) / 1000000; + + json data = { + {"status", "online"}, + {"uptime_sec", uptime_sec} + }; + + // Include system_info if available + if (!system_info_str.empty()) { + data["system_info"] = system_info_str; + } + + res->ok(data); + return res; + }; + + this->get_metrics = [this](const server_http_req &) { auto res = std::make_unique(ctx_server); if (!params.endpoint_metrics) { diff --git a/tools/server/server-context.h b/tools/server/server-context.h index 230b25952e..571d5debf7 100644 --- a/tools/server/server-context.h +++ b/tools/server/server-context.h @@ -51,8 +51,9 @@ struct server_context { struct server_res_generator; struct server_routes { - server_routes(const common_params & params, server_context & ctx_server, std::function is_ready = []() { return true; }) - : params(params), ctx_server(*ctx_server.impl), is_ready(is_ready) { + server_routes(const common_params & params, server_context & ctx_server, std::function is_ready = []() { return true; }, int64_t t_start = 0) + : params(params), ctx_server(*ctx_server.impl), is_ready(is_ready), t_server_start(t_start), + system_info_str(common_params_get_system_info(params)) { init_routes(); } @@ -60,6 +61,7 @@ struct server_routes { // handlers using lambda function, so that they can capture `this` without `std::bind` server_http_context::handler_t get_health; server_http_context::handler_t get_metrics; + server_http_context::handler_t get_v1_metrics; server_http_context::handler_t get_slots; server_http_context::handler_t post_slots; server_http_context::handler_t get_props; @@ -90,4 +92,6 @@ private: const common_params & params; server_context_impl & ctx_server; std::function is_ready; + int64_t t_server_start; + std::string system_info_str; }; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index d5bef3df44..bd413fb92b 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -108,12 +108,15 @@ int main(int argc, char ** argv, char ** envp) { return 1; } + // Capture server start time for metrics + int64_t t_server_start = llama_time_us(); + // // Router // // register API routes - server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); }); + server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); }, t_server_start); bool is_router_server = params.model.path.empty(); std::optional models_routes{}; @@ -153,6 +156,7 @@ int main(int argc, char ** argv, char ** envp) { ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check) ctx_http.get ("/v1/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check) + ctx_http.get ("/v1/metrics", ex_wrapper(routes.get_v1_metrics)); // public endpoint (no API key check) ctx_http.get ("/metrics", ex_wrapper(routes.get_metrics)); ctx_http.get ("/props", ex_wrapper(routes.get_props)); ctx_http.post("/props", ex_wrapper(routes.post_props));