Merge branch 'master' of https://github.com/ggml-org/llama.cpp into paddleocr-vl
This commit is contained in:
commit
6634ff16ea
|
|
@ -1,30 +0,0 @@
|
|||
name: 'Windows - Setup CURL'
|
||||
description: 'Composite action, to be reused in other workflow'
|
||||
inputs:
|
||||
curl_version:
|
||||
description: 'CURL version'
|
||||
required: false
|
||||
default: '8.6.0_6'
|
||||
architecture:
|
||||
description: 'Architecture of the libcurl to download'
|
||||
required: false
|
||||
default: 'win64'
|
||||
outputs:
|
||||
curl_path:
|
||||
description: "Path to the downloaded libcurl"
|
||||
value: ${{ steps.get_libcurl.outputs.curl_path }}
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: libCURL
|
||||
id: get_libcurl
|
||||
shell: powershell
|
||||
env:
|
||||
CURL_VERSION: ${{ inputs.curl_version }}
|
||||
ARCHITECTURE: ${{ inputs.architecture }}
|
||||
run: |
|
||||
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-${env:ARCHITECTURE}-mingw.zip"
|
||||
mkdir $env:RUNNER_TEMP/libcurl
|
||||
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
|
||||
echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT
|
||||
|
|
@ -1463,12 +1463,14 @@ jobs:
|
|||
"${{ steps.cann-image.outputs.image }}" \
|
||||
bash -lc '
|
||||
set -e
|
||||
yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake libcurl-devel
|
||||
yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
git config --global --add safe.directory "/workspace"
|
||||
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
|
||||
cmake -S . -B build \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DLLAMA_OPENSSL=ON \
|
||||
-DGGML_CANN=on \
|
||||
-DSOC_TYPE=${SOC_TYPE}
|
||||
cmake --build build -j $(nproc)
|
||||
|
|
|
|||
|
|
@ -37,13 +37,6 @@ jobs:
|
|||
key: macOS-latest-cmake-arm64
|
||||
evict-old-files: 1d
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
continue-on-error: true
|
||||
run: |
|
||||
brew update
|
||||
brew install curl
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
|
|
@ -52,6 +45,8 @@ jobs:
|
|||
-DCMAKE_INSTALL_RPATH='@loader_path' \
|
||||
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
|
||||
-DLLAMA_FATAL_WARNINGS=ON \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DLLAMA_BUILD_BORINGSSL=ON \
|
||||
-DGGML_METAL_USE_BF16=ON \
|
||||
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||
-DGGML_RPC=ON \
|
||||
|
|
@ -90,13 +85,6 @@ jobs:
|
|||
key: macOS-latest-cmake-x64
|
||||
evict-old-files: 1d
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
continue-on-error: true
|
||||
run: |
|
||||
brew update
|
||||
brew install curl
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
|
|
@ -107,6 +95,8 @@ jobs:
|
|||
-DCMAKE_INSTALL_RPATH='@loader_path' \
|
||||
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
|
||||
-DLLAMA_FATAL_WARNINGS=ON \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DLLAMA_BUILD_BORINGSSL=ON \
|
||||
-DGGML_METAL=OFF \
|
||||
-DGGML_RPC=ON \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
|
||||
|
|
@ -159,7 +149,7 @@ jobs:
|
|||
id: depends
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential libcurl4-openssl-dev
|
||||
sudo apt-get install build-essential libssl-dev
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
|
|
@ -171,6 +161,8 @@ jobs:
|
|||
-DGGML_NATIVE=OFF \
|
||||
-DGGML_CPU_ALL_VARIANTS=ON \
|
||||
-DLLAMA_FATAL_WARNINGS=ON \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DLLAMA_OPENSSL=ON \
|
||||
${{ env.CMAKE_ARGS }}
|
||||
cmake --build build --config Release -j $(nproc)
|
||||
|
||||
|
|
@ -212,7 +204,7 @@ jobs:
|
|||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
|
||||
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev
|
||||
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libssl-dev
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
|
|
@ -220,6 +212,8 @@ jobs:
|
|||
cmake -B build \
|
||||
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
|
||||
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DLLAMA_OPENSSL=ON \
|
||||
-DGGML_BACKEND_DL=ON \
|
||||
-DGGML_NATIVE=OFF \
|
||||
-DGGML_CPU_ALL_VARIANTS=ON \
|
||||
|
|
@ -269,34 +263,24 @@ jobs:
|
|||
run: |
|
||||
choco install ninja
|
||||
|
||||
- name: libCURL
|
||||
id: get_libcurl
|
||||
uses: ./.github/actions/windows-setup-curl
|
||||
with:
|
||||
architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}
|
||||
|
||||
- name: Build
|
||||
shell: cmd
|
||||
env:
|
||||
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }}
|
||||
cmake -S . -B build -G "Ninja Multi-Config" ^
|
||||
-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
|
||||
-DLLAMA_CURL=OFF ^
|
||||
-DLLAMA_BUILD_BORINGSSL=ON ^
|
||||
-DGGML_NATIVE=OFF ^
|
||||
-DGGML_BACKEND_DL=ON ^
|
||||
-DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^
|
||||
-DGGML_OPENMP=ON ^
|
||||
-DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" ^
|
||||
${{ env.CMAKE_ARGS }}
|
||||
cmake --build build --config Release
|
||||
|
||||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
env:
|
||||
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
||||
run: |
|
||||
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
|
||||
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
|
||||
7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
|
||||
|
||||
|
|
@ -744,12 +728,14 @@ jobs:
|
|||
"${{ steps.cann-image.outputs.image }}" \
|
||||
bash -lc '
|
||||
set -e
|
||||
yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake libcurl-devel
|
||||
yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel
|
||||
yum clean all && rm -rf /var/cache/yum
|
||||
git config --global --add safe.directory "/workspace"
|
||||
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
|
||||
cmake -S . -B build \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DLLAMA_OPENSSL=ON \
|
||||
-DGGML_CANN=on \
|
||||
-DSOC_TYPE=${SOC_TYPE}
|
||||
cmake --build build -j $(nproc)
|
||||
|
|
|
|||
|
|
@ -2877,10 +2877,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.n_threads_http = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP"));
|
||||
add_opt(common_arg(
|
||||
{"--cache-prompt"},
|
||||
{"--no-cache-prompt"},
|
||||
string_format("whether to enable prompt caching (default: %s)", params.cache_prompt ? "enabled" : "disabled"),
|
||||
[](common_params & params, bool value) {
|
||||
params.cache_prompt = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CACHE_PROMPT"));
|
||||
add_opt(common_arg(
|
||||
{"--cache-reuse"}, "N",
|
||||
string_format(
|
||||
"min chunk size to attempt reusing from the cache via KV shifting (default: %d)\n"
|
||||
"min chunk size to attempt reusing from the cache via KV shifting, requires prompt caching to be enabled (default: %d)\n"
|
||||
"[(card)](https://ggml.ai/f0.png)", params.n_cache_reuse
|
||||
),
|
||||
[](common_params & params, int value) {
|
||||
|
|
|
|||
|
|
@ -476,6 +476,7 @@ struct common_params {
|
|||
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
||||
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
||||
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
|
||||
bool cache_prompt = true; // whether to enable prompt caching
|
||||
int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot
|
||||
int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc.
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ vendor = {
|
|||
# "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
"https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.0/httplib.h": "vendor/cpp-httplib/httplib.h",
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.0/LICENSE": "vendor/cpp-httplib/LICENSE",
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/httplib.h": "vendor/cpp-httplib/httplib.h",
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/LICENSE": "vendor/cpp-httplib/LICENSE",
|
||||
|
||||
"https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -160,6 +160,7 @@ task_params server_task::params_from_json_cmpl(
|
|||
defaults.n_keep = params_base.n_keep;
|
||||
defaults.n_predict = params_base.n_predict;
|
||||
defaults.n_cache_reuse = params_base.n_cache_reuse;
|
||||
defaults.cache_prompt = params_base.cache_prompt;
|
||||
defaults.antiprompt = params_base.antiprompt;
|
||||
|
||||
// enabling this will output extra debug information in the HTTP responses from the server
|
||||
|
|
@ -169,7 +170,7 @@ task_params server_task::params_from_json_cmpl(
|
|||
params.stream = json_value(data, "stream", false);
|
||||
auto stream_opt = json_value(data, "stream_options", json::object());
|
||||
params.include_usage = json_value(stream_opt, "include_usage", false);
|
||||
params.cache_prompt = json_value(data, "cache_prompt", true);
|
||||
params.cache_prompt = json_value(data, "cache_prompt", defaults.cache_prompt);
|
||||
params.return_tokens = json_value(data, "return_tokens", false);
|
||||
params.return_progress = json_value(data, "return_progress", false);
|
||||
params.n_predict = json_value(data, "n_predict", json_value(data, "max_tokens", defaults.n_predict));
|
||||
|
|
|
|||
|
|
@ -1138,6 +1138,7 @@ int getaddrinfo_with_timeout(const char *node, const char *service,
|
|||
|
||||
return ret;
|
||||
#elif TARGET_OS_MAC
|
||||
if (!node) { return EAI_NONAME; }
|
||||
// macOS implementation using CFHost API for asynchronous DNS resolution
|
||||
CFStringRef hostname_ref = CFStringCreateWithCString(
|
||||
kCFAllocatorDefault, node, kCFStringEncodingUTF8);
|
||||
|
|
@ -5569,14 +5570,11 @@ bool Server::read_content(Stream &strm, Request &req, Response &res) {
|
|||
strm, req, res,
|
||||
// Regular
|
||||
[&](const char *buf, size_t n) {
|
||||
// Prevent arithmetic overflow when checking sizes.
|
||||
// Avoid computing (req.body.size() + n) directly because
|
||||
// adding two unsigned `size_t` values can wrap around and
|
||||
// produce a small result instead of indicating overflow.
|
||||
// Instead, check using subtraction: ensure `n` does not
|
||||
// exceed the remaining capacity `max_size() - size()`.
|
||||
if (req.body.size() >= req.body.max_size() ||
|
||||
n > req.body.max_size() - req.body.size()) {
|
||||
// Limit decompressed body size to payload_max_length_ to protect
|
||||
// against "zip bomb" attacks where a small compressed payload
|
||||
// decompresses to a massive size.
|
||||
if (req.body.size() + n > payload_max_length_ ||
|
||||
req.body.size() + n > req.body.max_size()) {
|
||||
return false;
|
||||
}
|
||||
req.body.append(buf, n);
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@
|
|||
#ifndef CPPHTTPLIB_HTTPLIB_H
|
||||
#define CPPHTTPLIB_HTTPLIB_H
|
||||
|
||||
#define CPPHTTPLIB_VERSION "0.30.0"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x001E00"
|
||||
#define CPPHTTPLIB_VERSION "0.30.1"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x001E01"
|
||||
|
||||
/*
|
||||
* Platform compatibility check
|
||||
|
|
@ -205,7 +205,10 @@
|
|||
|
||||
#pragma comment(lib, "ws2_32.lib")
|
||||
|
||||
#ifndef _SSIZE_T_DEFINED
|
||||
using ssize_t = __int64;
|
||||
#define _SSIZE_T_DEFINED
|
||||
#endif
|
||||
#endif // _MSC_VER
|
||||
|
||||
#ifndef S_ISREG
|
||||
|
|
@ -2443,16 +2446,20 @@ namespace detail {
|
|||
|
||||
#if defined(_WIN32)
|
||||
inline std::wstring u8string_to_wstring(const char *s) {
|
||||
std::wstring ws;
|
||||
if (!s) { return std::wstring(); }
|
||||
|
||||
auto len = static_cast<int>(strlen(s));
|
||||
if (!len) { return std::wstring(); }
|
||||
|
||||
auto wlen = ::MultiByteToWideChar(CP_UTF8, 0, s, len, nullptr, 0);
|
||||
if (wlen > 0) {
|
||||
ws.resize(wlen);
|
||||
wlen = ::MultiByteToWideChar(
|
||||
CP_UTF8, 0, s, len,
|
||||
const_cast<LPWSTR>(reinterpret_cast<LPCWSTR>(ws.data())), wlen);
|
||||
if (wlen != static_cast<int>(ws.size())) { ws.clear(); }
|
||||
}
|
||||
if (!wlen) { return std::wstring(); }
|
||||
|
||||
std::wstring ws;
|
||||
ws.resize(wlen);
|
||||
wlen = ::MultiByteToWideChar(
|
||||
CP_UTF8, 0, s, len,
|
||||
const_cast<LPWSTR>(reinterpret_cast<LPCWSTR>(ws.data())), wlen);
|
||||
if (wlen != static_cast<int>(ws.size())) { ws.clear(); }
|
||||
return ws;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue