server: Introduce LLAMA_BUILD_WEBUI build flag to allow disabling the embedded web ui (#20158)

* introduce LLAMA_SERVER_NO_WEBUI * LLAMA_SERVER_NO_WEBUI → LLAMA_BUILD_WEBUI * LLAMA_BUILD_WEBUI ON by default not based on LLAMA_STANDALONE * MIssed this * Add useWebUi to package.nix
2026-03-27 11:25:55 -05:00 · 2026-03-27 11:25:55 -05:00 · ff934e29bc
parent ee051c1e4e
commit ff934e29bc
4 changed files with 32 additions and 15 deletions
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@ -41,6 +41,7 @@
  effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  enableStatic ? effectiveStdenv.hostPlatform.isStatic,
  precompileMetalShaders ? false,
  useWebUi ? true,
 }:
 let
@ -164,6 +165,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
  cmakeFlags =
    [
      (cmakeBool "LLAMA_BUILD_SERVER" true)
      (cmakeBool "LLAMA_BUILD_WEBUI" useWebUi)
      (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
      (cmakeBool "GGML_NATIVE" false)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -108,6 +108,7 @@ option(LLAMA_BUILD_TESTS    "llama: build tests"          ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_TOOLS    "llama: build tools"          ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_EXAMPLES "llama: build examples"       ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_WEBUI    "llama: build the embedded Web UI for server"  ON)
 option(LLAMA_TOOLS_INSTALL  "llama: install tools"        ${LLAMA_TOOLS_INSTALL_DEFAULT})
 option(LLAMA_TESTS_INSTALL  "llama: install tests"        ON)
--- a/tools/server/CMakeLists.txt
+++ b/tools/server/CMakeLists.txt
@ -37,22 +37,29 @@ set(TARGET_SRCS
    server-models.cpp
    server-models.h
 )
 set(PUBLIC_ASSETS
    index.html.gz
    loading.html
 )
-foreach(asset ${PUBLIC_ASSETS})
+option(LLAMA_BUILD_WEBUI "Build the embedded Web UI" ON)
-    set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
+
-    set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
+if (LLAMA_BUILD_WEBUI)
-    list(APPEND TARGET_SRCS ${output})
+    set(PUBLIC_ASSETS
-    add_custom_command(
+        index.html.gz
-        DEPENDS "${input}"
+        loading.html
        OUTPUT "${output}"
        COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
    )
-    set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
+
-endforeach()
+    foreach(asset ${PUBLIC_ASSETS})
        set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
        set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
        list(APPEND TARGET_SRCS ${output})
        add_custom_command(
            DEPENDS "${input}"
            OUTPUT "${output}"
            COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
        )
        set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
    endforeach()
    add_definitions(-DLLAMA_BUILD_WEBUI)
 else()
 endif()
 add_executable(${TARGET} ${TARGET_SRCS})
 install(TARGETS ${TARGET} RUNTIME)
--- a/tools/server/server-http.cpp
+++ b/tools/server/server-http.cpp
@ -8,9 +8,11 @@
 #include <string>
 #include <thread>
 #ifdef LLAMA_BUILD_WEBUI
 // auto generated files (see README.md for details)
 #include "index.html.gz.hpp"
 #include "loading.html.hpp"
 #endif
 //
 // HTTP implementation using cpp-httplib
@ -181,11 +183,14 @@ bool server_http_context::init(const common_params & params) {
    auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
        bool ready = is_ready.load();
        if (!ready) {
 #ifdef LLAMA_BUILD_WEBUI
            auto tmp = string_split<std::string>(req.path, '.');
            if (req.path == "/" || tmp.back() == "html") {
                res.status = 503;
                res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
-            } else {
+            } else
 #endif
            {
                // no endpoints is allowed to be accessed when the server is not ready
                // this is to prevent any data races or inconsistent states
                res.status = 503;
@ -255,6 +260,7 @@ bool server_http_context::init(const common_params & params) {
                return 1;
            }
        } else {
 #ifdef LLAMA_BUILD_WEBUI
            // using embedded static index.html
            srv->Get(params.api_prefix + "/", [](const httplib::Request & req, httplib::Response & res) {
                if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
@ -268,6 +274,7 @@ bool server_http_context::init(const common_params & params) {
                }
                return false;
            });
 #endif
        }
    }
    return true;