diff --git a/.github/actions/windows-setup-curl/action.yml b/.github/actions/windows-setup-curl/action.yml deleted file mode 100644 index 446f799fac..0000000000 --- a/.github/actions/windows-setup-curl/action.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: 'Windows - Setup CURL' -description: 'Composite action, to be reused in other workflow' -inputs: - curl_version: - description: 'CURL version' - required: false - default: '8.6.0_6' - architecture: - description: 'Architecture of the libcurl to download' - required: false - default: 'win64' -outputs: - curl_path: - description: "Path to the downloaded libcurl" - value: ${{ steps.get_libcurl.outputs.curl_path }} - -runs: - using: "composite" - steps: - - name: libCURL - id: get_libcurl - shell: powershell - env: - CURL_VERSION: ${{ inputs.curl_version }} - ARCHITECTURE: ${{ inputs.architecture }} - run: | - curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-${env:ARCHITECTURE}-mingw.zip" - mkdir $env:RUNNER_TEMP/libcurl - tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl - echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 446a3750d7..3c89b4fab6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1463,12 +1463,14 @@ jobs: "${{ steps.cann-image.outputs.image }}" \ bash -lc ' set -e - yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake libcurl-devel + yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel yum clean all && rm -rf /var/cache/yum git config --global --add safe.directory "/workspace" export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} cmake -S . -B build \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DLLAMA_CURL=OFF \ + -DLLAMA_OPENSSL=ON \ -DGGML_CANN=on \ -DSOC_TYPE=${SOC_TYPE} cmake --build build -j $(nproc) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bf5ebb7559..35e1fae697 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -37,13 +37,6 @@ jobs: key: macOS-latest-cmake-arm64 evict-old-files: 1d - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - brew install curl - - name: Build id: cmake_build run: | @@ -52,6 +45,8 @@ jobs: -DCMAKE_INSTALL_RPATH='@loader_path' \ -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_CURL=OFF \ + -DLLAMA_BUILD_BORINGSSL=ON \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DGGML_RPC=ON \ @@ -90,13 +85,6 @@ jobs: key: macOS-latest-cmake-x64 evict-old-files: 1d - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - brew install curl - - name: Build id: cmake_build run: | @@ -107,6 +95,8 @@ jobs: -DCMAKE_INSTALL_RPATH='@loader_path' \ -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_CURL=OFF \ + -DLLAMA_BUILD_BORINGSSL=ON \ -DGGML_METAL=OFF \ -DGGML_RPC=ON \ -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3 @@ -159,7 +149,7 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + sudo apt-get install build-essential libssl-dev - name: Build id: cmake_build @@ -171,6 +161,8 @@ jobs: -DGGML_NATIVE=OFF \ -DGGML_CPU_ALL_VARIANTS=ON \ -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_CURL=OFF \ + -DLLAMA_OPENSSL=ON \ ${{ env.CMAKE_ARGS }} cmake --build build --config Release -j $(nproc) @@ -212,7 +204,7 @@ jobs: wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev + sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libssl-dev - name: Build id: cmake_build @@ -220,6 +212,8 @@ jobs: cmake -B build \ -DCMAKE_INSTALL_RPATH='$ORIGIN' \ -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ + -DLLAMA_CURL=OFF \ + -DLLAMA_OPENSSL=ON \ -DGGML_BACKEND_DL=ON \ -DGGML_NATIVE=OFF \ -DGGML_CPU_ALL_VARIANTS=ON \ @@ -269,34 +263,24 @@ jobs: run: | choco install ninja - - name: libCURL - id: get_libcurl - uses: ./.github/actions/windows-setup-curl - with: - architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }} - - name: Build shell: cmd - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }} cmake -S . -B build -G "Ninja Multi-Config" ^ -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^ + -DLLAMA_CURL=OFF ^ + -DLLAMA_BUILD_BORINGSSL=ON ^ -DGGML_NATIVE=OFF ^ -DGGML_BACKEND_DL=ON ^ -DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^ -DGGML_OPENMP=ON ^ - -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" ^ ${{ env.CMAKE_ARGS }} cmake --build build --config Release - name: Pack artifacts id: pack_artifacts - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\ Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ 7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\* @@ -744,12 +728,14 @@ jobs: "${{ steps.cann-image.outputs.image }}" \ bash -lc ' set -e - yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake libcurl-devel + yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel yum clean all && rm -rf /var/cache/yum git config --global --add safe.directory "/workspace" export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} cmake -S . -B build \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DLLAMA_CURL=OFF \ + -DLLAMA_OPENSSL=ON \ -DGGML_CANN=on \ -DSOC_TYPE=${SOC_TYPE} cmake --build build -j $(nproc) diff --git a/common/arg.cpp b/common/arg.cpp index f2675f842a..4b96c312f3 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2877,10 +2877,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.n_threads_http = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP")); + add_opt(common_arg( + {"--cache-prompt"}, + {"--no-cache-prompt"}, + string_format("whether to enable prompt caching (default: %s)", params.cache_prompt ? "enabled" : "disabled"), + [](common_params & params, bool value) { + params.cache_prompt = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CACHE_PROMPT")); add_opt(common_arg( {"--cache-reuse"}, "N", string_format( - "min chunk size to attempt reusing from the cache via KV shifting (default: %d)\n" + "min chunk size to attempt reusing from the cache via KV shifting, requires prompt caching to be enabled (default: %d)\n" "[(card)](https://ggml.ai/f0.png)", params.n_cache_reuse ), [](common_params & params, int value) { diff --git a/common/common.h b/common/common.h index b3ac04c4ae..e60087dea3 100644 --- a/common/common.h +++ b/common/common.h @@ -476,6 +476,7 @@ struct common_params { int32_t timeout_write = timeout_read; // http write timeout in seconds int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool) int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting + bool cache_prompt = true; // whether to enable prompt caching int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc. diff --git a/scripts/sync_vendor.py b/scripts/sync_vendor.py index 95377b29f7..c3fbbc20b3 100755 --- a/scripts/sync_vendor.py +++ b/scripts/sync_vendor.py @@ -16,8 +16,8 @@ vendor = { # "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h", "https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h", - "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.0/httplib.h": "vendor/cpp-httplib/httplib.h", - "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.0/LICENSE": "vendor/cpp-httplib/LICENSE", + "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/httplib.h": "vendor/cpp-httplib/httplib.h", + "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/LICENSE": "vendor/cpp-httplib/LICENSE", "https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h", } diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index ed4f6546ea..aa4590e4ec 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -160,6 +160,7 @@ task_params server_task::params_from_json_cmpl( defaults.n_keep = params_base.n_keep; defaults.n_predict = params_base.n_predict; defaults.n_cache_reuse = params_base.n_cache_reuse; + defaults.cache_prompt = params_base.cache_prompt; defaults.antiprompt = params_base.antiprompt; // enabling this will output extra debug information in the HTTP responses from the server @@ -169,7 +170,7 @@ task_params server_task::params_from_json_cmpl( params.stream = json_value(data, "stream", false); auto stream_opt = json_value(data, "stream_options", json::object()); params.include_usage = json_value(stream_opt, "include_usage", false); - params.cache_prompt = json_value(data, "cache_prompt", true); + params.cache_prompt = json_value(data, "cache_prompt", defaults.cache_prompt); params.return_tokens = json_value(data, "return_tokens", false); params.return_progress = json_value(data, "return_progress", false); params.n_predict = json_value(data, "n_predict", json_value(data, "max_tokens", defaults.n_predict)); diff --git a/vendor/cpp-httplib/httplib.cpp b/vendor/cpp-httplib/httplib.cpp index a437a36ed7..d707e65fd3 100644 --- a/vendor/cpp-httplib/httplib.cpp +++ b/vendor/cpp-httplib/httplib.cpp @@ -1138,6 +1138,7 @@ int getaddrinfo_with_timeout(const char *node, const char *service, return ret; #elif TARGET_OS_MAC + if (!node) { return EAI_NONAME; } // macOS implementation using CFHost API for asynchronous DNS resolution CFStringRef hostname_ref = CFStringCreateWithCString( kCFAllocatorDefault, node, kCFStringEncodingUTF8); @@ -5569,14 +5570,11 @@ bool Server::read_content(Stream &strm, Request &req, Response &res) { strm, req, res, // Regular [&](const char *buf, size_t n) { - // Prevent arithmetic overflow when checking sizes. - // Avoid computing (req.body.size() + n) directly because - // adding two unsigned `size_t` values can wrap around and - // produce a small result instead of indicating overflow. - // Instead, check using subtraction: ensure `n` does not - // exceed the remaining capacity `max_size() - size()`. - if (req.body.size() >= req.body.max_size() || - n > req.body.max_size() - req.body.size()) { + // Limit decompressed body size to payload_max_length_ to protect + // against "zip bomb" attacks where a small compressed payload + // decompresses to a massive size. + if (req.body.size() + n > payload_max_length_ || + req.body.size() + n > req.body.max_size()) { return false; } req.body.append(buf, n); diff --git a/vendor/cpp-httplib/httplib.h b/vendor/cpp-httplib/httplib.h index 43cdbc5832..613020d12c 100644 --- a/vendor/cpp-httplib/httplib.h +++ b/vendor/cpp-httplib/httplib.h @@ -8,8 +8,8 @@ #ifndef CPPHTTPLIB_HTTPLIB_H #define CPPHTTPLIB_HTTPLIB_H -#define CPPHTTPLIB_VERSION "0.30.0" -#define CPPHTTPLIB_VERSION_NUM "0x001E00" +#define CPPHTTPLIB_VERSION "0.30.1" +#define CPPHTTPLIB_VERSION_NUM "0x001E01" /* * Platform compatibility check @@ -205,7 +205,10 @@ #pragma comment(lib, "ws2_32.lib") +#ifndef _SSIZE_T_DEFINED using ssize_t = __int64; +#define _SSIZE_T_DEFINED +#endif #endif // _MSC_VER #ifndef S_ISREG @@ -2443,16 +2446,20 @@ namespace detail { #if defined(_WIN32) inline std::wstring u8string_to_wstring(const char *s) { - std::wstring ws; + if (!s) { return std::wstring(); } + auto len = static_cast(strlen(s)); + if (!len) { return std::wstring(); } + auto wlen = ::MultiByteToWideChar(CP_UTF8, 0, s, len, nullptr, 0); - if (wlen > 0) { - ws.resize(wlen); - wlen = ::MultiByteToWideChar( - CP_UTF8, 0, s, len, - const_cast(reinterpret_cast(ws.data())), wlen); - if (wlen != static_cast(ws.size())) { ws.clear(); } - } + if (!wlen) { return std::wstring(); } + + std::wstring ws; + ws.resize(wlen); + wlen = ::MultiByteToWideChar( + CP_UTF8, 0, s, len, + const_cast(reinterpret_cast(ws.data())), wlen); + if (wlen != static_cast(ws.size())) { ws.clear(); } return ws; } #endif