Merge branch 'master' into feat-rdma-9493
This commit is contained in:
commit
28f18da663
|
|
@ -0,0 +1,57 @@
|
||||||
|
name: CI (3rd-party)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-3rd-party.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/.cmake',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp'
|
||||||
|
]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
GGML_NLOOP: 3
|
||||||
|
GGML_N_THREADS: 1
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ubuntu-24-llguidance:
|
||||||
|
runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential libssl-dev
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DLLAMA_FATAL_WARNINGS=ON \
|
||||||
|
-DLLAMA_LLGUIDANCE=ON
|
||||||
|
cmake --build build --config Release -j $(nproc)
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
id: cmake_test
|
||||||
|
run: |
|
||||||
|
cd build
|
||||||
|
ctest -L main --verbose --timeout 900
|
||||||
|
|
||||||
|
|
@ -0,0 +1,140 @@
|
||||||
|
name: CI (android)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-android.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/.cmake',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp'
|
||||||
|
]
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-android.yml',
|
||||||
|
'examples/llama.android/**'
|
||||||
|
]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
GGML_NLOOP: 3
|
||||||
|
GGML_N_THREADS: 1
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
android:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
# Disabled due to size (400MB) and always 0 cache hits
|
||||||
|
# - name: ccache
|
||||||
|
# uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
# with:
|
||||||
|
# key: android-build
|
||||||
|
# evict-old-files: 1d
|
||||||
|
|
||||||
|
- name: Set up JDK
|
||||||
|
uses: actions/setup-java@v5
|
||||||
|
with:
|
||||||
|
java-version: 17
|
||||||
|
distribution: zulu
|
||||||
|
|
||||||
|
- name: Setup Android SDK
|
||||||
|
uses: android-actions/setup-android@v3
|
||||||
|
with:
|
||||||
|
log-accepted-android-sdk-licenses: false
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
cd examples/llama.android
|
||||||
|
./gradlew build --no-daemon
|
||||||
|
|
||||||
|
android-ndk:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
env:
|
||||||
|
OPENCL_VERSION: 2025.07.22
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build: 'arm64-cpu'
|
||||||
|
defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF'
|
||||||
|
- build: 'arm64-snapdragon'
|
||||||
|
defines: '--preset arm64-android-snapdragon-release'
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: Install OpenCL Headers and Libs
|
||||||
|
id: install_opencl
|
||||||
|
if: ${{ matrix.build == 'arm64-snapdragon' }}
|
||||||
|
run: |
|
||||||
|
mkdir opencl
|
||||||
|
curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
|
||||||
|
curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
|
||||||
|
curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
|
||||||
|
tar -xaf opencl/headers.tar.gz -C opencl
|
||||||
|
tar -xaf opencl/clhpp.tar.gz -C opencl
|
||||||
|
tar -xaf opencl/icd-loader.tar.gz -C opencl
|
||||||
|
sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
|
||||||
|
sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL
|
||||||
|
cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION}
|
||||||
|
cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared
|
||||||
|
cmake --build build
|
||||||
|
sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
|
||||||
|
rm -rf opencl
|
||||||
|
|
||||||
|
- name: Install Hexagon SDK
|
||||||
|
id: install_hexsdk
|
||||||
|
if: ${{ matrix.build == 'arm64-snapdragon' }}
|
||||||
|
env:
|
||||||
|
HEXSDK_VER: 6.4.0.2
|
||||||
|
HEXTLS_VER: 19.0.04
|
||||||
|
run: |
|
||||||
|
curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz
|
||||||
|
mkdir hex-sdk
|
||||||
|
tar -xaf hex-sdk.tar.gz -C hex-sdk
|
||||||
|
ls -l hex-sdk
|
||||||
|
sudo mv hex-sdk /opt/hexagon
|
||||||
|
echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV"
|
||||||
|
echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV"
|
||||||
|
echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV"
|
||||||
|
echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV"
|
||||||
|
echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV"
|
||||||
|
echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
|
- name: Update CMake presets
|
||||||
|
id: update_presets
|
||||||
|
if: ${{ matrix.build == 'arm64-snapdragon' }}
|
||||||
|
run: |
|
||||||
|
cp docs/backend/snapdragon/CMakeUserPresets.json .
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: ndk_build
|
||||||
|
run: |
|
||||||
|
cmake ${{ matrix.defines }} -B build
|
||||||
|
cmake --build build
|
||||||
|
cmake --install build --prefix pkg-adb/llama.cpp
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
id: cmake_test
|
||||||
|
run: |
|
||||||
|
echo "FIXME: test on devices"
|
||||||
|
|
@ -0,0 +1,214 @@
|
||||||
|
name: CI (apple)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-apple.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/.cmake',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp',
|
||||||
|
'**/*.swift',
|
||||||
|
'**/*.m',
|
||||||
|
'**/*.metal'
|
||||||
|
]
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-apple.yml',
|
||||||
|
'ggml/src/ggml-metal/**'
|
||||||
|
]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
GGML_NLOOP: 3
|
||||||
|
GGML_N_THREADS: 1
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
macOS-latest-ios:
|
||||||
|
runs-on: macos-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: macOS-latest-ios
|
||||||
|
evict-old-files: 1d
|
||||||
|
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
sysctl -a
|
||||||
|
cmake -B build -G Xcode \
|
||||||
|
-DGGML_METAL_USE_BF16=ON \
|
||||||
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
|
-DLLAMA_BUILD_COMMON=OFF \
|
||||||
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
|
-DLLAMA_BUILD_TOOLS=OFF \
|
||||||
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
|
-DCMAKE_SYSTEM_NAME=iOS \
|
||||||
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
||||||
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||||
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||||
|
|
||||||
|
macos-latest-ios-xcode:
|
||||||
|
runs-on: macos-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: Setup Xcode
|
||||||
|
uses: ggml-org/setup-xcode@v1
|
||||||
|
with:
|
||||||
|
xcode-version: latest-stable
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
sysctl -a
|
||||||
|
cmake -B build -G Xcode \
|
||||||
|
-DGGML_METAL_USE_BF16=ON \
|
||||||
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
|
-DLLAMA_OPENSSL=OFF \
|
||||||
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
|
-DLLAMA_BUILD_TOOLS=OFF \
|
||||||
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
|
-DCMAKE_SYSTEM_NAME=iOS \
|
||||||
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
||||||
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||||
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||||
|
|
||||||
|
- name: xcodebuild for swift package
|
||||||
|
id: xcodebuild
|
||||||
|
run: |
|
||||||
|
./build-xcframework.sh
|
||||||
|
|
||||||
|
- name: Upload xcframework artifact
|
||||||
|
uses: actions/upload-artifact@v6
|
||||||
|
with:
|
||||||
|
name: llama-xcframework
|
||||||
|
path: build-apple/llama.xcframework/
|
||||||
|
retention-days: 1
|
||||||
|
|
||||||
|
- name: Build Xcode project
|
||||||
|
run: |
|
||||||
|
xcodebuild -downloadPlatform iOS
|
||||||
|
xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
|
||||||
|
|
||||||
|
macOS-latest-tvos:
|
||||||
|
runs-on: macos-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: macOS-latest-tvos
|
||||||
|
evict-old-files: 1d
|
||||||
|
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
sysctl -a
|
||||||
|
cmake -B build -G Xcode \
|
||||||
|
-DGGML_METAL_USE_BF16=ON \
|
||||||
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
|
-DLLAMA_BUILD_COMMON=OFF \
|
||||||
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
|
-DLLAMA_BUILD_TOOLS=OFF \
|
||||||
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
|
-DCMAKE_SYSTEM_NAME=tvOS \
|
||||||
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
||||||
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||||
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||||
|
|
||||||
|
macOS-latest-visionos:
|
||||||
|
runs-on: macos-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
sysctl -a
|
||||||
|
cmake -B build -G Xcode \
|
||||||
|
-DGGML_METAL_USE_BF16=ON \
|
||||||
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
|
-DLLAMA_BUILD_COMMON=OFF \
|
||||||
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
|
-DLLAMA_BUILD_TOOLS=OFF \
|
||||||
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
|
-DCMAKE_SYSTEM_NAME=visionOS \
|
||||||
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
|
||||||
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||||
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||||
|
|
||||||
|
macOS-latest-swift:
|
||||||
|
runs-on: macos-latest
|
||||||
|
needs: macos-latest-ios-xcode
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: macOS-latest-swift
|
||||||
|
evict-old-files: 1d
|
||||||
|
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
||||||
|
|
||||||
|
- name: Download xcframework artifact
|
||||||
|
uses: actions/download-artifact@v7
|
||||||
|
with:
|
||||||
|
name: llama-xcframework
|
||||||
|
path: build-apple/llama.xcframework/
|
||||||
|
|
||||||
|
- name: Build llama.cpp with CMake
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
sysctl -a
|
||||||
|
cmake -B build -G Xcode \
|
||||||
|
-DGGML_METAL_USE_BF16=ON \
|
||||||
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
|
-DLLAMA_OPENSSL=OFF \
|
||||||
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
|
-DLLAMA_BUILD_TOOLS=OFF \
|
||||||
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
|
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
|
||||||
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
@ -37,31 +37,31 @@ jobs:
|
||||||
path: ./vulkan_sdk
|
path: ./vulkan_sdk
|
||||||
version: ${{ env.VULKAN_SDK_VERSION }}
|
version: ${{ env.VULKAN_SDK_VERSION }}
|
||||||
|
|
||||||
ubuntu-24-spacemit-cache:
|
#ubuntu-24-spacemit-cache:
|
||||||
runs-on: ubuntu-24.04
|
# runs-on: ubuntu-24.04
|
||||||
|
|
||||||
env:
|
# env:
|
||||||
# Make sure this is in sync with build-linux-cross.yml
|
# # Make sure this is in sync with build-linux-cross.yml
|
||||||
SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"
|
# SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"
|
||||||
|
|
||||||
steps:
|
# steps:
|
||||||
- name: Clone
|
# - name: Clone
|
||||||
id: checkout
|
# id: checkout
|
||||||
uses: actions/checkout@v6
|
# uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Setup Cache
|
# - name: Setup Cache
|
||||||
uses: actions/cache@v5
|
# uses: actions/cache@v5
|
||||||
id: cache-toolchain
|
# id: cache-toolchain
|
||||||
with:
|
# with:
|
||||||
path: ./spacemit_toolchain
|
# path: ./spacemit_toolchain
|
||||||
key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }}
|
# key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }}
|
||||||
|
|
||||||
- name: Setup SpacemiT Toolchain
|
# - name: Setup SpacemiT Toolchain
|
||||||
if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
# if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
||||||
uses: ./.github/actions/linux-setup-spacemit
|
# uses: ./.github/actions/linux-setup-spacemit
|
||||||
with:
|
# with:
|
||||||
path: ./spacemit_toolchain
|
# path: ./spacemit_toolchain
|
||||||
version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}
|
# version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}
|
||||||
|
|
||||||
ubuntu-24-openvino-cache:
|
ubuntu-24-openvino-cache:
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,102 @@
|
||||||
|
name: CI (cann)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-cann.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/.cmake',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp'
|
||||||
|
]
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-cann.yml',
|
||||||
|
'ggml/src/ggml-cann/**'
|
||||||
|
]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
GGML_NLOOP: 3
|
||||||
|
GGML_N_THREADS: 1
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
openEuler-latest-cann:
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash -el {0}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
arch: [x86, aarch64]
|
||||||
|
chip_type: ['910b', '310p']
|
||||||
|
build: ['Release']
|
||||||
|
use_acl_graph: ['on', 'off']
|
||||||
|
exclude:
|
||||||
|
# 310P does not support USE_ACL_GRAPH=on
|
||||||
|
- chip_type: '310p'
|
||||||
|
use_acl_graph: 'on'
|
||||||
|
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Free up disk space
|
||||||
|
uses: ggml-org/free-disk-space@v1.3.1
|
||||||
|
with:
|
||||||
|
tool-cache: true
|
||||||
|
|
||||||
|
- name: Set container image
|
||||||
|
id: cann-image
|
||||||
|
run: |
|
||||||
|
image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}"
|
||||||
|
echo "image=${image}" >> "${GITHUB_OUTPUT}"
|
||||||
|
|
||||||
|
- name: Pull container image
|
||||||
|
run: docker pull "${{ steps.cann-image.outputs.image }}"
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
env:
|
||||||
|
BUILD_TYPE: ${{ matrix.build }}
|
||||||
|
SOC_TYPE: ascend${{ matrix.chip_type }}
|
||||||
|
USE_ACL_GRAPH: ${{ matrix.use_acl_graph }}
|
||||||
|
run: |
|
||||||
|
HOST_UID=$(id -u)
|
||||||
|
HOST_GID=$(id -g)
|
||||||
|
|
||||||
|
docker run --rm \
|
||||||
|
-v "${PWD}:/workspace" \
|
||||||
|
-w /workspace \
|
||||||
|
-e SOC_TYPE=${SOC_TYPE} \
|
||||||
|
-e BUILD_TYPE=${BUILD_TYPE} \
|
||||||
|
-e USE_ACL_GRAPH=${USE_ACL_GRAPH} \
|
||||||
|
"${{ steps.cann-image.outputs.image }}" \
|
||||||
|
bash -lc '
|
||||||
|
set -e
|
||||||
|
yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel
|
||||||
|
yum clean all && rm -rf /var/cache/yum
|
||||||
|
git config --global --add safe.directory "/workspace"
|
||||||
|
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
|
||||||
|
cmake -S . -B build \
|
||||||
|
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
|
||||||
|
-DGGML_CANN=on \
|
||||||
|
-DSOC_TYPE=${SOC_TYPE} \
|
||||||
|
-DUSE_ACL_GRAPH=${USE_ACL_GRAPH}
|
||||||
|
cmake --build build -j $(nproc)
|
||||||
|
|
||||||
|
chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
|
||||||
|
'
|
||||||
|
|
@ -1,7 +1,24 @@
|
||||||
name: Build on Linux using cross-compiler
|
name: CI (cross)
|
||||||
on:
|
on:
|
||||||
|
# only manual triggers due to low-importance of the workflows
|
||||||
|
# TODO: for regular runs, provision dedicated self-hosted runners
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
workflow_call:
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-cross.yml',
|
||||||
|
'ggml/src/spacemit/*',
|
||||||
|
'ggml/src/arch/loongarch/*'
|
||||||
|
]
|
||||||
|
# run once every week
|
||||||
|
schedule:
|
||||||
|
- cron: '0 0 * * 0'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# ubuntu-24-riscv64-cpu-cross:
|
# ubuntu-24-riscv64-cpu-cross:
|
||||||
|
|
@ -264,15 +281,15 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Use SpacemiT Toolchain Cache
|
#- name: Use SpacemiT Toolchain Cache
|
||||||
uses: actions/cache@v5
|
# uses: actions/cache@v5
|
||||||
id: cache-toolchain
|
# id: cache-toolchain
|
||||||
with:
|
# with:
|
||||||
path: ./spacemit_toolchain
|
# path: ./spacemit_toolchain
|
||||||
key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }}
|
# key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }}
|
||||||
|
|
||||||
- name: Setup SpacemiT Toolchain
|
- name: Setup SpacemiT Toolchain
|
||||||
if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
#if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
||||||
uses: ./.github/actions/linux-setup-spacemit
|
uses: ./.github/actions/linux-setup-spacemit
|
||||||
with:
|
with:
|
||||||
path: ./spacemit_toolchain
|
path: ./spacemit_toolchain
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
name: CI (msys)
|
||||||
|
|
||||||
|
on:
|
||||||
|
# only manual triggers due to low-importance of the workflows
|
||||||
|
# TODO: for regular runs, provision dedicated self-hosted runners
|
||||||
|
workflow_dispatch:
|
||||||
|
# run once every week
|
||||||
|
schedule:
|
||||||
|
- cron: '0 0 * * 0'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
GGML_NLOOP: 3
|
||||||
|
GGML_N_THREADS: 1
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
windows-msys2:
|
||||||
|
runs-on: windows-2025
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
|
||||||
|
- { sys: CLANG64, env: clang-x86_64, build: Release }
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
#- name: ccache
|
||||||
|
# uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
# with:
|
||||||
|
# key: windows-msys2
|
||||||
|
# variant: ccache
|
||||||
|
# evict-old-files: 1d
|
||||||
|
# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
||||||
|
|
||||||
|
- name: Setup ${{ matrix.sys }}
|
||||||
|
uses: msys2/setup-msys2@v2
|
||||||
|
with:
|
||||||
|
update: true
|
||||||
|
msystem: ${{matrix.sys}}
|
||||||
|
install: >-
|
||||||
|
base-devel
|
||||||
|
git
|
||||||
|
mingw-w64-${{matrix.env}}-toolchain
|
||||||
|
mingw-w64-${{matrix.env}}-cmake
|
||||||
|
mingw-w64-${{matrix.env}}-openblas
|
||||||
|
|
||||||
|
- name: Build using CMake
|
||||||
|
shell: msys2 {0}
|
||||||
|
run: |
|
||||||
|
cmake -B build
|
||||||
|
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||||
|
|
||||||
|
- name: Clean after building using CMake
|
||||||
|
shell: msys2 {0}
|
||||||
|
run: |
|
||||||
|
rm -rf build
|
||||||
|
|
||||||
|
- name: Build using CMake w/ OpenBLAS
|
||||||
|
shell: msys2 {0}
|
||||||
|
run: |
|
||||||
|
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||||
|
|
@ -0,0 +1,136 @@
|
||||||
|
name: CI (riscv)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-riscv.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/.cmake',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp'
|
||||||
|
]
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-riscv.yml',
|
||||||
|
'ggml/src/ggml-cpu/arch/riscv/**'
|
||||||
|
]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
GGML_NLOOP: 3
|
||||||
|
GGML_N_THREADS: 1
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ubuntu-riscv64-native-sanitizer:
|
||||||
|
runs-on: RISCV64
|
||||||
|
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
||||||
|
build_type: [Debug]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
|
||||||
|
# Install necessary packages
|
||||||
|
sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs
|
||||||
|
|
||||||
|
# Set gcc-14 and g++-14 as the default compilers
|
||||||
|
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
|
||||||
|
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
|
||||||
|
sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
|
||||||
|
sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
|
||||||
|
|
||||||
|
# Install Rust stable version
|
||||||
|
rustup install stable
|
||||||
|
rustup default stable
|
||||||
|
|
||||||
|
git lfs install
|
||||||
|
|
||||||
|
- name: GCC version check
|
||||||
|
run: |
|
||||||
|
gcc --version
|
||||||
|
g++ --version
|
||||||
|
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: Setup ccache
|
||||||
|
run: |
|
||||||
|
# Unique cache directory per matrix combination
|
||||||
|
export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}"
|
||||||
|
mkdir -p "$CCACHE_DIR"
|
||||||
|
|
||||||
|
# Configure ccache
|
||||||
|
ccache --set-config=max_size=5G
|
||||||
|
ccache --set-config=compression=true
|
||||||
|
ccache --set-config=compression_level=6
|
||||||
|
ccache --set-config=cache_dir="$CCACHE_DIR"
|
||||||
|
ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
|
||||||
|
ccache --set-config=hash_dir=false
|
||||||
|
|
||||||
|
# Export for subsequent steps
|
||||||
|
echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
|
||||||
|
echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
if: ${{ matrix.sanitizer != 'THREAD' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DLLAMA_OPENSSL=OFF \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
|
-DGGML_OPENMP=ON \
|
||||||
|
-DLLAMA_BUILD_EXAMPLES=ON \
|
||||||
|
-DLLAMA_BUILD_TOOLS=ON \
|
||||||
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
|
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||||
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
|
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
|
||||||
|
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
|
||||||
|
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
||||||
|
|
||||||
|
- name: Build (no OpenMP)
|
||||||
|
id: cmake_build_no_openmp
|
||||||
|
if: ${{ matrix.sanitizer == 'THREAD' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DLLAMA_OPENSSL=OFF \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
|
-DGGML_OPENMP=OFF \
|
||||||
|
-DLLAMA_BUILD_EXAMPLES=ON \
|
||||||
|
-DLLAMA_BUILD_TOOLS=ON \
|
||||||
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
|
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||||
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
|
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
|
||||||
|
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
|
||||||
|
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
id: cmake_test
|
||||||
|
run: |
|
||||||
|
cd build
|
||||||
|
ctest -L main --verbose --timeout 900
|
||||||
|
|
@ -0,0 +1,87 @@
|
||||||
|
name: CI (sanitize)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-sanitize.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/.cmake',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp'
|
||||||
|
]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
GGML_NLOOP: 3
|
||||||
|
GGML_N_THREADS: 1
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ubuntu-latest-sanitizer:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
||||||
|
build_type: [Debug]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ubuntu-latest-sanitizer-${{ matrix.sanitizer }}
|
||||||
|
evict-old-files: 1d
|
||||||
|
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential libssl-dev
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
if: ${{ matrix.sanitizer != 'THREAD' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DLLAMA_FATAL_WARNINGS=ON \
|
||||||
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
|
-DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||||
|
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
||||||
|
|
||||||
|
- name: Build (no OpenMP)
|
||||||
|
id: cmake_build_no_openmp
|
||||||
|
if: ${{ matrix.sanitizer == 'THREAD' }}
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DLLAMA_FATAL_WARNINGS=ON \
|
||||||
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
|
-DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
|
-DGGML_OPENMP=OFF
|
||||||
|
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
id: cmake_test
|
||||||
|
run: |
|
||||||
|
cd build
|
||||||
|
ctest -L main --verbose --timeout 900
|
||||||
|
|
@ -222,15 +222,7 @@ jobs:
|
||||||
id: checkout
|
id: checkout
|
||||||
uses: actions/checkout@v6
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Use OpenVINO Toolkit Cache
|
|
||||||
uses: actions/cache@v5
|
|
||||||
id: cache-openvino
|
|
||||||
with:
|
|
||||||
path: ./openvino_toolkit
|
|
||||||
key: openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }}
|
|
||||||
|
|
||||||
- name: Setup OpenVINO Toolkit
|
- name: Setup OpenVINO Toolkit
|
||||||
if: steps.cache-openvino.outputs.cache-hit != 'true'
|
|
||||||
uses: ./.github/actions/linux-setup-openvino
|
uses: ./.github/actions/linux-setup-openvino
|
||||||
with:
|
with:
|
||||||
path: ./openvino_toolkit
|
path: ./openvino_toolkit
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,96 @@
|
||||||
|
name: CI (vulkan)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-vulkan.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/.cmake',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp',
|
||||||
|
'**/*.comp',
|
||||||
|
'**/*.glsl'
|
||||||
|
]
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/build-vulkan.yml',
|
||||||
|
'ggml/src/ggml-vulkan/**'
|
||||||
|
]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
GGML_NLOOP: 3
|
||||||
|
GGML_N_THREADS: 1
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ubuntu-24-vulkan-llvmpipe:
|
||||||
|
runs-on: ubuntu-24.04
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ubuntu-24-vulkan-llvmpipe
|
||||||
|
evict-old-files: 1d
|
||||||
|
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo add-apt-repository -y ppa:kisak/kisak-mesa
|
||||||
|
sudo apt-get update -y
|
||||||
|
sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev
|
||||||
|
|
||||||
|
- name: Get latest Vulkan SDK version
|
||||||
|
id: vulkan_sdk_version
|
||||||
|
run: |
|
||||||
|
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
|
- name: Use Vulkan SDK Cache
|
||||||
|
uses: actions/cache@v5
|
||||||
|
id: cache-sdk
|
||||||
|
with:
|
||||||
|
path: ./vulkan_sdk
|
||||||
|
key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}
|
||||||
|
|
||||||
|
- name: Setup Vulkan SDK
|
||||||
|
if: steps.cache-sdk.outputs.cache-hit != 'true'
|
||||||
|
uses: ./.github/actions/linux-setup-vulkan-llvmpipe
|
||||||
|
with:
|
||||||
|
path: ./vulkan_sdk
|
||||||
|
version: ${{ env.VULKAN_SDK_VERSION }}
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
source ./vulkan_sdk/setup-env.sh
|
||||||
|
cmake -B build \
|
||||||
|
-DGGML_VULKAN=ON
|
||||||
|
cmake --build build --config Release -j $(nproc)
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
id: cmake_test
|
||||||
|
run: |
|
||||||
|
cd build
|
||||||
|
export GGML_VK_VISIBLE_DEVICES=0
|
||||||
|
export GGML_VK_DISABLE_F16=1
|
||||||
|
export GGML_VK_DISABLE_COOPMAT=1
|
||||||
|
# This is using llvmpipe and runs slower than other backends
|
||||||
|
ctest -L main --verbose --timeout 4800
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -4,10 +4,16 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
paths: [
|
||||||
|
'.github/workflows/python-lint.yml',
|
||||||
|
'**/*.py'
|
||||||
|
]
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
paths: [
|
||||||
|
'.github/workflows/python-lint.yml',
|
||||||
|
'**/*.py'
|
||||||
|
]
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,22 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
|
paths: [
|
||||||
|
'.github/workflows/release.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/.cmake',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp',
|
||||||
|
'**/*.cu',
|
||||||
|
'**/*.cuh',
|
||||||
|
'**/*.swift',
|
||||||
|
'**/*.m',
|
||||||
|
'**/*.metal',
|
||||||
|
'**/*.comp',
|
||||||
|
'**/*.glsl'
|
||||||
|
]
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
|
@ -34,7 +49,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: macOS-latest-cmake-arm64
|
key: macOS-latest-arm64
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
|
|
@ -81,7 +96,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: macOS-latest-cmake-x64
|
key: macOS-latest-x64
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
|
|
@ -140,7 +155,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: ubuntu-cpu-cmake-${{ matrix.build }}
|
key: ubuntu-cpu-${{ matrix.build }}
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|
@ -191,7 +206,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: ubuntu-22-cmake-vulkan
|
key: ubuntu-22-vulkan
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|
@ -256,7 +271,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: ubuntu-24-cmake-openvino-release-no-preset-v1
|
key: ubuntu-24-openvino-release-no-preset-v1
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|
@ -329,7 +344,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: windows-latest-cmake-cpu-${{ matrix.arch }}
|
key: windows-latest-cpu-${{ matrix.arch }}
|
||||||
variant: ccache
|
variant: ccache
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
|
@ -390,7 +405,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }}
|
key: windows-latest-${{ matrix.backend }}-${{ matrix.arch }}
|
||||||
variant: ccache
|
variant: ccache
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
|
@ -536,7 +551,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: windows-latest-cmake-sycl
|
key: windows-latest-sycl
|
||||||
variant: ccache
|
variant: ccache
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
|
@ -616,7 +631,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: ubuntu-rocm-cmake-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
|
key: ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|
@ -726,7 +741,7 @@ jobs:
|
||||||
- name: ccache
|
- name: ccache
|
||||||
uses: ggml-org/ccache-action@v1.2.16
|
uses: ggml-org/ccache-action@v1.2.16
|
||||||
with:
|
with:
|
||||||
key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
|
key: windows-latest-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
|
||||||
evict-old-files: 1d
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Install ROCm
|
- name: Install ROCm
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,105 @@
|
||||||
|
name: Server (sanitize)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
inputs:
|
||||||
|
sha:
|
||||||
|
description: 'Commit SHA1 to build'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
slow_tests:
|
||||||
|
description: 'Run slow tests'
|
||||||
|
required: true
|
||||||
|
type: boolean
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: [
|
||||||
|
'.github/workflows/server-sanitize.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/Makefile',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp',
|
||||||
|
'tools/server/**.*'
|
||||||
|
]
|
||||||
|
|
||||||
|
env:
|
||||||
|
LLAMA_LOG_COLORS: 1
|
||||||
|
LLAMA_LOG_PREFIX: 1
|
||||||
|
LLAMA_LOG_TIMESTAMPS: 1
|
||||||
|
LLAMA_LOG_VERBOSITY: 10
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
server:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
sanitizer: [ADDRESS, UNDEFINED] # THREAD is very slow
|
||||||
|
build_type: [RelWithDebInfo]
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get -y install \
|
||||||
|
build-essential \
|
||||||
|
xxd \
|
||||||
|
git \
|
||||||
|
cmake \
|
||||||
|
curl \
|
||||||
|
wget \
|
||||||
|
language-pack-en \
|
||||||
|
libssl-dev
|
||||||
|
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
cmake -B build \
|
||||||
|
-DLLAMA_BUILD_BORINGSSL=ON \
|
||||||
|
-DGGML_SCHED_NO_REALLOC=ON \
|
||||||
|
-DGGML_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \
|
||||||
|
-DGGML_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \
|
||||||
|
-DGGML_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} \
|
||||||
|
-DLLAMA_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \
|
||||||
|
-DLLAMA_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \
|
||||||
|
-DLLAMA_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }}
|
||||||
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
|
- name: Python setup
|
||||||
|
id: setup_python
|
||||||
|
uses: actions/setup-python@v6
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
pip-install: -r tools/server/tests/requirements.txt
|
||||||
|
|
||||||
|
- name: Tests
|
||||||
|
id: server_integration_tests
|
||||||
|
if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
|
||||||
|
run: |
|
||||||
|
cd tools/server/tests
|
||||||
|
export ${{ matrix.extra_args }}
|
||||||
|
pytest -v -x -m "not slow"
|
||||||
|
|
||||||
|
- name: Slow tests
|
||||||
|
id: server_integration_tests_slow
|
||||||
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||||
|
run: |
|
||||||
|
cd tools/server/tests
|
||||||
|
export ${{ matrix.extra_args }}
|
||||||
|
SLOW_TESTS=1 pytest -v -x
|
||||||
|
|
@ -14,7 +14,19 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['.github/workflows/server-self-hosted.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
|
paths: [
|
||||||
|
'.github/workflows/server-self-hosted.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/Makefile',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp',
|
||||||
|
'**/*.cu',
|
||||||
|
'**/*.swift',
|
||||||
|
'**/*.m',
|
||||||
|
'tools/server/**.*'
|
||||||
|
]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
LLAMA_LOG_COLORS: 1
|
LLAMA_LOG_COLORS: 1
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
# Server WebUI build and tests
|
|
||||||
name: Server WebUI
|
name: Server WebUI
|
||||||
|
|
||||||
on:
|
on:
|
||||||
|
|
@ -11,10 +10,20 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**']
|
paths: [
|
||||||
|
'.github/workflows/server-webui.yml',
|
||||||
|
'tools/server/webui/**.*',
|
||||||
|
'tools/server/tests/**.*',
|
||||||
|
'tools/server/public/**'
|
||||||
|
]
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**']
|
paths: [
|
||||||
|
'.github/workflows/server-webui.yml',
|
||||||
|
'tools/server/webui/**.*',
|
||||||
|
'tools/server/tests/**.*',
|
||||||
|
'tools/server/public/**'
|
||||||
|
]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
LLAMA_LOG_COLORS: 1
|
LLAMA_LOG_COLORS: 1
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
# Server build and tests
|
|
||||||
name: Server
|
name: Server
|
||||||
|
|
||||||
on:
|
on:
|
||||||
|
|
@ -15,10 +14,34 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
|
paths: [
|
||||||
|
'.github/workflows/server.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/Makefile',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp',
|
||||||
|
'**/*.cu',
|
||||||
|
'**/*.swift',
|
||||||
|
'**/*.m',
|
||||||
|
'tools/server/**.*'
|
||||||
|
]
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
|
paths: [
|
||||||
|
'.github/workflows/server.yml',
|
||||||
|
'**/CMakeLists.txt',
|
||||||
|
'**/Makefile',
|
||||||
|
'**/*.h',
|
||||||
|
'**/*.hpp',
|
||||||
|
'**/*.c',
|
||||||
|
'**/*.cpp',
|
||||||
|
'**/*.cu',
|
||||||
|
'**/*.swift',
|
||||||
|
'**/*.m',
|
||||||
|
'tools/server/**.*'
|
||||||
|
]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
LLAMA_LOG_COLORS: 1
|
LLAMA_LOG_COLORS: 1
|
||||||
|
|
@ -34,17 +57,18 @@ jobs:
|
||||||
server:
|
server:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
name: server (${{ matrix.wf_name }})
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
sanitizer: [ADDRESS, UNDEFINED] # THREAD is very slow
|
build_type: [Release]
|
||||||
build_type: [RelWithDebInfo]
|
wf_name: ["default"]
|
||||||
include:
|
include:
|
||||||
- build_type: Release
|
- build_type: Release
|
||||||
sanitizer: ""
|
|
||||||
extra_args: ""
|
extra_args: ""
|
||||||
|
wf_name: "default"
|
||||||
- build_type: Release
|
- build_type: Release
|
||||||
sanitizer: ""
|
|
||||||
extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
|
extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
|
||||||
|
wf_name: "backend-sampling"
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -74,13 +98,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
cmake -B build \
|
cmake -B build \
|
||||||
-DLLAMA_BUILD_BORINGSSL=ON \
|
-DLLAMA_BUILD_BORINGSSL=ON \
|
||||||
-DGGML_SCHED_NO_REALLOC=ON \
|
-DGGML_SCHED_NO_REALLOC=ON
|
||||||
-DGGML_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \
|
|
||||||
-DGGML_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \
|
|
||||||
-DGGML_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} \
|
|
||||||
-DLLAMA_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \
|
|
||||||
-DLLAMA_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \
|
|
||||||
-DLLAMA_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }}
|
|
||||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
- name: Python setup
|
- name: Python setup
|
||||||
|
|
|
||||||
56
CODEOWNERS
56
CODEOWNERS
|
|
@ -2,29 +2,13 @@
|
||||||
# multiplie collaborators per item can be specified
|
# multiplie collaborators per item can be specified
|
||||||
|
|
||||||
/.devops/*.Dockerfile @ngxson
|
/.devops/*.Dockerfile @ngxson
|
||||||
/.github/actions/ @CISC
|
/.github/actions/ @ggml-org/ci
|
||||||
/.github/workflows/ @CISC
|
/.github/workflows/ @ggml-org/ci
|
||||||
/ci/ @ggerganov
|
/ci/ @ggerganov
|
||||||
/cmake/ @ggerganov
|
/cmake/ @ggerganov
|
||||||
/common/CMakeLists.txt @ggerganov
|
/common/ @ggml-org/llama-common
|
||||||
/common/arg.* @ggerganov
|
/common/jinja/ @CISC
|
||||||
/common/base64.hpp.* @ggerganov
|
|
||||||
/common/build-info.* @ggerganov
|
|
||||||
/common/chat.* @pwilkin
|
|
||||||
/common/chat-auto*.* @pwilkin
|
|
||||||
/common/chat-diff-analyzer.* @pwilkin
|
|
||||||
/common/chat-peg-parser.* @aldehir
|
|
||||||
/common/common.* @ggerganov
|
|
||||||
/common/console.* @ggerganov
|
|
||||||
/common/http.* @angt
|
|
||||||
/common/jinja/ @ngxson @CISC @aldehir
|
|
||||||
/common/llguidance.* @ggerganov
|
|
||||||
/common/log.* @ggerganov
|
|
||||||
/common/ngram-map.* @srogmann
|
/common/ngram-map.* @srogmann
|
||||||
/common/peg-parser.* @aldehir
|
|
||||||
/common/sampling.* @ggerganov
|
|
||||||
/common/speculative.* @ggerganov
|
|
||||||
/common/unicode.* @aldehir
|
|
||||||
/convert_*.py @CISC
|
/convert_*.py @CISC
|
||||||
/examples/batched.swift/ @ggerganov
|
/examples/batched.swift/ @ggerganov
|
||||||
/examples/batched/ @ggerganov
|
/examples/batched/ @ggerganov
|
||||||
|
|
@ -51,29 +35,27 @@
|
||||||
/examples/speculative/ @ggerganov
|
/examples/speculative/ @ggerganov
|
||||||
/ggml/cmake/ @ggerganov
|
/ggml/cmake/ @ggerganov
|
||||||
/ggml/include/ @ggerganov
|
/ggml/include/ @ggerganov
|
||||||
|
/ggml/src/ggml-cann/ @ggml-org/ggml-cann
|
||||||
/ggml/src/ggml-common.h @ggerganov
|
/ggml/src/ggml-common.h @ggerganov
|
||||||
/ggml/src/ggml-cpu/ @ggerganov
|
/ggml/src/ggml-cpu/ @ggerganov
|
||||||
/ggml/src/ggml-cpu/spacemit/ @alex-spacemit
|
/ggml/src/ggml-cpu/spacemit/ @alex-spacemit
|
||||||
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
|
/ggml/src/ggml-cuda/ @ggml-org/ggml-cuda
|
||||||
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler @am17an
|
|
||||||
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
|
|
||||||
/ggml/src/ggml-cuda/mmvf.* @JohannesGaessler
|
|
||||||
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
|
|
||||||
/ggml/src/ggml-cuda/fattn-wmma* @IMbackK
|
/ggml/src/ggml-cuda/fattn-wmma* @IMbackK
|
||||||
/ggml/src/ggml-hip/ @IMbackK
|
/ggml/src/ggml-hip/ @IMbackK
|
||||||
/ggml/src/ggml-cuda/vendors/hip.h @IMbackK
|
/ggml/src/ggml-cuda/vendors/hip.h @IMbackK
|
||||||
/ggml/src/ggml-impl.h @ggerganov
|
/ggml/src/ggml-impl.h @ggerganov
|
||||||
/ggml/src/ggml-metal/ @ggerganov
|
/ggml/src/ggml-metal/ @ggml-org/ggml-metal
|
||||||
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky
|
/ggml/src/ggml-opencl/ @ggml-org/ggml-opencl
|
||||||
/ggml/src/ggml-hexagon/ @max-krasnyansky @lhez
|
/ggml/src/ggml-hexagon/ @ggml-org/ggml-hexagon
|
||||||
/ggml/src/ggml-opt.cpp @JohannesGaessler
|
/ggml/src/ggml-opt.cpp @JohannesGaessler
|
||||||
/ggml/src/ggml-quants.* @ggerganov
|
/ggml/src/ggml-quants.* @ggerganov
|
||||||
/ggml/src/ggml-rpc/ @rgerganov
|
/ggml/src/ggml-rpc/ @ggml-org/ggml-rpc
|
||||||
|
/ggml/src/ggml-sycl/ @ggml-org/ggml-sycl
|
||||||
/ggml/src/ggml-threading.* @ggerganov
|
/ggml/src/ggml-threading.* @ggerganov
|
||||||
/ggml/src/ggml-vulkan/ @0cc4m
|
/ggml/src/ggml-vulkan/ @ggml-org/ggml-vulkan
|
||||||
/ggml/src/ggml-virtgpu/ @kpouget
|
/ggml/src/ggml-virtgpu/ @kpouget
|
||||||
/ggml/src/ggml-webgpu/ @reeselevine
|
/ggml/src/ggml-webgpu/ @ggml-org/ggml-webgpu
|
||||||
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
|
/ggml/src/ggml-zdnn/ @ggml-org/ggml-zdnn @Andreas-Krebbel @AlekseiNikiforovIBM
|
||||||
/ggml/src/ggml-openvino/ @cavusmustafa @wine99
|
/ggml/src/ggml-openvino/ @cavusmustafa @wine99
|
||||||
/ggml/src/ggml.c @ggerganov
|
/ggml/src/ggml.c @ggerganov
|
||||||
/ggml/src/ggml.cpp @ggerganov
|
/ggml/src/ggml.cpp @ggerganov
|
||||||
|
|
@ -93,16 +75,18 @@
|
||||||
/src/models/ @CISC
|
/src/models/ @CISC
|
||||||
/tests/ @ggerganov
|
/tests/ @ggerganov
|
||||||
/tests/test-chat.* @pwilkin
|
/tests/test-chat.* @pwilkin
|
||||||
|
/tests/test-llama-archs.cpp @JohannesGaessler
|
||||||
/tools/batched-bench/ @ggerganov
|
/tools/batched-bench/ @ggerganov
|
||||||
/tools/cli/ @ngxson
|
/tools/cli/ @ngxson
|
||||||
/tools/completion/ @ggerganov
|
/tools/completion/ @ggerganov
|
||||||
/tools/mtmd/ @ngxson
|
/tools/mtmd/ @ggml-org/llama-mtmd
|
||||||
/tools/perplexity/ @ggerganov
|
/tools/perplexity/ @ggerganov
|
||||||
/tools/parser/ @pwilkin
|
/tools/parser/ @pwilkin
|
||||||
/tools/quantize/ @ggerganov
|
/tools/quantize/ @ggerganov
|
||||||
/tools/rpc/ @rgerganov
|
/tools/rpc/ @ggml-org/ggml-rpc
|
||||||
/tools/server/* @ngxson @ggerganov # no subdir
|
/tools/server/* @ggml-org/llama-server # no subdir
|
||||||
/tools/server/webui/ @allozaur
|
/tools/server/tests/ @ggml-org/llama-server
|
||||||
|
/tools/server/webui/ @ggml-org/llama-webui
|
||||||
/tools/tokenize/ @ggerganov
|
/tools/tokenize/ @ggerganov
|
||||||
/tools/tts/ @ggerganov
|
/tools/tts/ @ggerganov
|
||||||
/vendor/ @ggerganov
|
/vendor/ @ggerganov
|
||||||
|
|
|
||||||
|
|
@ -479,6 +479,7 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz
|
||||||
|
|
||||||
if (!comparison_with_tools || !comparison_with_reasoning) {
|
if (!comparison_with_tools || !comparison_with_reasoning) {
|
||||||
LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
|
LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto & diff_tools = comparison_with_tools->diff;
|
const auto & diff_tools = comparison_with_tools->diff;
|
||||||
|
|
@ -911,8 +912,10 @@ void analyze_tools::extract_function_markers() {
|
||||||
// we'll have to rely on an extra diff with no-calls version
|
// we'll have to rely on an extra diff with no-calls version
|
||||||
auto notool_comp = compare_variants(
|
auto notool_comp = compare_variants(
|
||||||
*tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); });
|
*tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); });
|
||||||
auto nt_diff = notool_comp->diff;
|
if (notool_comp) {
|
||||||
closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4);
|
auto nt_diff = notool_comp->diff;
|
||||||
|
closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker));
|
closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -892,7 +892,7 @@ void launch_fattn(
|
||||||
const int ntiles_x = ((Q->ne[1] + ncols1 - 1) / ncols1);
|
const int ntiles_x = ((Q->ne[1] + ncols1 - 1) / ncols1);
|
||||||
const int gqa_ratio = Q->ne[2] / K->ne[2];
|
const int gqa_ratio = Q->ne[2] / K->ne[2];
|
||||||
const int ntiles_z_gqa = ((gqa_ratio + ncols2 - 1) / ncols2);
|
const int ntiles_z_gqa = ((gqa_ratio + ncols2 - 1) / ncols2);
|
||||||
const int ntiles_total = ntiles_x * ntiles_z_gqa * K->ne[2] * Q->ne[3];
|
const int ntiles_dst = ntiles_x * ntiles_z_gqa * K->ne[2] * Q->ne[3];
|
||||||
|
|
||||||
// Optional optimization where the mask is scanned to determine whether part of the calculation can be skipped.
|
// Optional optimization where the mask is scanned to determine whether part of the calculation can be skipped.
|
||||||
// Only worth the overhead if there is at lease one FATTN_KQ_STRIDE x FATTN_KQ_STRIDE square to be skipped or
|
// Only worth the overhead if there is at lease one FATTN_KQ_STRIDE x FATTN_KQ_STRIDE square to be skipped or
|
||||||
|
|
@ -919,37 +919,37 @@ void launch_fattn(
|
||||||
GGML_ASSERT(max_blocks_per_sm > 0);
|
GGML_ASSERT(max_blocks_per_sm > 0);
|
||||||
int parallel_blocks = max_blocks_per_sm;
|
int parallel_blocks = max_blocks_per_sm;
|
||||||
|
|
||||||
|
const int ntiles_KV = (K->ne[1] + nbatch_fa - 1) / nbatch_fa; // Max. number of parallel blocks limited by KV cache length.
|
||||||
|
|
||||||
dim3 blocks_num;
|
dim3 blocks_num;
|
||||||
if (stream_k) {
|
if (stream_k) {
|
||||||
// For short contexts it can be faster to have the SMs work on whole tiles because this lets us skip the fixup.
|
// For short contexts it can be faster to have the SMs work on whole tiles because this lets us skip the fixup.
|
||||||
const int max_blocks = max_blocks_per_sm*nsm;
|
const int max_blocks = max_blocks_per_sm*nsm;
|
||||||
const int tiles_nwaves = (ntiles_total + max_blocks - 1) / max_blocks;
|
const int tiles_nwaves = (ntiles_dst + max_blocks - 1) / max_blocks;
|
||||||
const int tiles_efficiency_percent = 100 * ntiles_total / (max_blocks*tiles_nwaves);
|
const int tiles_efficiency_percent = 100 * ntiles_dst / (max_blocks*tiles_nwaves);
|
||||||
|
|
||||||
const int nblocks_stream_k = max_blocks;
|
const int nblocks_stream_k = std::min(max_blocks, ntiles_KV*ntiles_dst);
|
||||||
|
|
||||||
const bool use_stream_k = cc >= GGML_CUDA_CC_ADA_LOVELACE || amd_wmma_available(cc) || tiles_efficiency_percent < 75;
|
const bool use_stream_k = cc >= GGML_CUDA_CC_ADA_LOVELACE || amd_wmma_available(cc) || tiles_efficiency_percent < 75;
|
||||||
|
|
||||||
blocks_num.x = use_stream_k ? nblocks_stream_k : ntiles_total;
|
blocks_num.x = use_stream_k ? nblocks_stream_k : ntiles_dst;
|
||||||
blocks_num.y = 1;
|
blocks_num.y = 1;
|
||||||
blocks_num.z = 1;
|
blocks_num.z = 1;
|
||||||
|
|
||||||
if (ntiles_total % blocks_num.x != 0) { // Fixup is only needed if the SMs work on fractional tiles.
|
if (ntiles_dst % blocks_num.x != 0) { // Fixup is only needed if the SMs work on fractional tiles.
|
||||||
dst_tmp_meta.alloc((size_t(blocks_num.x) * ncols * (2 + DV/2)));
|
dst_tmp_meta.alloc((size_t(blocks_num.x) * ncols * (2 + DV/2)));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const int ntiles_KQ = (K->ne[1] + nbatch_fa - 1) / nbatch_fa; // Max. number of parallel blocks limited by tensor size.
|
|
||||||
|
|
||||||
// parallel_blocks must not be larger than what the tensor size allows:
|
// parallel_blocks must not be larger than what the tensor size allows:
|
||||||
parallel_blocks = std::min(parallel_blocks, ntiles_KQ);
|
parallel_blocks = std::min(parallel_blocks, ntiles_KV);
|
||||||
|
|
||||||
// If ntiles_total % blocks_per_wave != 0 then some efficiency is lost due to tail effects.
|
// If ntiles_total % blocks_per_wave != 0 then some efficiency is lost due to tail effects.
|
||||||
// Test whether parallel_blocks can be set to a higher value for better efficiency.
|
// Test whether parallel_blocks can be set to a higher value for better efficiency.
|
||||||
const int blocks_per_wave = nsm * max_blocks_per_sm;
|
const int blocks_per_wave = nsm * max_blocks_per_sm;
|
||||||
int nwaves_best = 0;
|
int nwaves_best = 0;
|
||||||
int efficiency_percent_best = 0;
|
int efficiency_percent_best = 0;
|
||||||
for (int parallel_blocks_test = parallel_blocks; parallel_blocks_test <= ntiles_KQ; ++parallel_blocks_test) {
|
for (int parallel_blocks_test = parallel_blocks; parallel_blocks_test <= ntiles_KV; ++parallel_blocks_test) {
|
||||||
const int nblocks_total = ntiles_total * parallel_blocks_test;
|
const int nblocks_total = ntiles_dst * parallel_blocks_test;
|
||||||
const int nwaves = (nblocks_total + blocks_per_wave - 1) / blocks_per_wave;
|
const int nwaves = (nblocks_total + blocks_per_wave - 1) / blocks_per_wave;
|
||||||
const int efficiency_percent = 100 * nblocks_total / (nwaves*blocks_per_wave);
|
const int efficiency_percent = 100 * nblocks_total / (nwaves*blocks_per_wave);
|
||||||
|
|
||||||
|
|
@ -1015,7 +1015,7 @@ void launch_fattn(
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
if (stream_k) {
|
if (stream_k) {
|
||||||
if (ntiles_total % blocks_num.x != 0) { // Fixup is only needed if the SMs work on fractional tiles.
|
if (ntiles_dst % blocks_num.x != 0) { // Fixup is only needed if the SMs work on fractional tiles.
|
||||||
const dim3 block_dim_combine(DV, 1, 1);
|
const dim3 block_dim_combine(DV, 1, 1);
|
||||||
const dim3 blocks_num_combine = {blocks_num.x, ncols1, ncols2};
|
const dim3 blocks_num_combine = {blocks_num.x, ncols1, ncols2};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,10 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
|
||||||
err = cudaMallocManaged(ptr, size);
|
err = cudaMallocManaged(ptr, size);
|
||||||
#if defined(GGML_USE_HIP)
|
#if defined(GGML_USE_HIP)
|
||||||
if (err == hipSuccess) {
|
if (err == hipSuccess) {
|
||||||
CUDA_CHECK(cudaMemAdvise(*ptr, size, hipMemAdviseSetCoarseGrain, device));
|
// hipMemAdviseSetCoarseGrain is an optional performance hint;
|
||||||
|
// ignore errors (e.g. hipErrorInvalidValue on some APU/iGPU configs).
|
||||||
|
cudaMemAdvise(*ptr, size, hipMemAdviseSetCoarseGrain, device);
|
||||||
|
(void)hipGetLastError(); // clear any error
|
||||||
}
|
}
|
||||||
|
|
||||||
// fall back to cudaMalloc if not supported (e.g. on Windows)
|
// fall back to cudaMalloc if not supported (e.g. on Windows)
|
||||||
|
|
@ -251,11 +254,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||||
info.devices[id].supports_cooperative_launch = false;
|
info.devices[id].supports_cooperative_launch = false;
|
||||||
#endif // !(GGML_USE_MUSA)
|
#endif // !(GGML_USE_MUSA)
|
||||||
|
|
||||||
// cudaMemGetInfo returns info for the current device
|
|
||||||
size_t free_mem;
|
|
||||||
CUDA_CHECK(cudaSetDevice(id));
|
|
||||||
CUDA_CHECK(cudaMemGetInfo(&free_mem, NULL));
|
|
||||||
|
|
||||||
#if defined(GGML_USE_HIP)
|
#if defined(GGML_USE_HIP)
|
||||||
info.devices[id].smpbo = prop.sharedMemPerBlock;
|
info.devices[id].smpbo = prop.sharedMemPerBlock;
|
||||||
|
|
||||||
|
|
@ -270,25 +268,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||||
info.devices[id].cc += prop.minor * 0x10;
|
info.devices[id].cc += prop.minor * 0x10;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, VRAM: %zu MiB (%zu MiB free)\n",
|
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, VRAM: %zu MiB\n",
|
||||||
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
|
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
|
||||||
device_vmm ? "yes" : "no", prop.warpSize,
|
device_vmm ? "yes" : "no", prop.warpSize,
|
||||||
(size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024));
|
(size_t)(prop.totalGlobalMem / (1024 * 1024)));
|
||||||
#elif defined(GGML_USE_MUSA)
|
#elif defined(GGML_USE_MUSA)
|
||||||
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
|
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
|
||||||
info.devices[id].warp_size = 32;
|
info.devices[id].warp_size = 32;
|
||||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||||
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
|
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
|
||||||
info.devices[id].cc += prop.minor * 0x10;
|
info.devices[id].cc += prop.minor * 0x10;
|
||||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB (%zu MiB free)\n",
|
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB\n",
|
||||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||||
(size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024));
|
(size_t)(prop.totalGlobalMem / (1024 * 1024)));
|
||||||
#else
|
#else
|
||||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB (%zu MiB free)\n",
|
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB\n",
|
||||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||||
(size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024));
|
(size_t)(prop.totalGlobalMem / (1024 * 1024)));
|
||||||
std::string device_name(prop.name);
|
std::string device_name(prop.name);
|
||||||
if (device_name == "NVIDIA GeForce MX450") {
|
if (device_name == "NVIDIA GeForce MX450") {
|
||||||
turing_devices_without_mma.push_back({ id, device_name });
|
turing_devices_without_mma.push_back({ id, device_name });
|
||||||
|
|
@ -303,6 +301,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||||
// TODO: Check for future drivers the default scheduling strategy and
|
// TODO: Check for future drivers the default scheduling strategy and
|
||||||
// remove this call again when cudaDeviceScheduleSpin is default.
|
// remove this call again when cudaDeviceScheduleSpin is default.
|
||||||
if (prop.major == 12 && prop.minor == 1) {
|
if (prop.major == 12 && prop.minor == 1) {
|
||||||
|
CUDA_CHECK(cudaSetDevice(id));
|
||||||
CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
|
CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ void gated_delta_net_sycl(const float * q,
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int r = 0; r < rows_per_lane; r++) {
|
for (int r = 0; r < rows_per_lane; r++) {
|
||||||
const int i = r * warp_size + lane;
|
const int i = r * warp_size + lane;
|
||||||
s_shard[r] = curr_state[i * S_v + col];
|
s_shard[r] = curr_state[col * S_v + i];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int t = 0; t < n_tokens; t++) {
|
for (int t = 0; t < n_tokens; t++) {
|
||||||
|
|
@ -137,7 +137,7 @@ void gated_delta_net_sycl(const float * q,
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int r = 0; r < rows_per_lane; r++) {
|
for (int r = 0; r < rows_per_lane; r++) {
|
||||||
const int i = r * warp_size + lane;
|
const int i = r * warp_size + lane;
|
||||||
state[i * S_v + col] = s_shard[r];
|
state[col * S_v + i] = s_shard[r];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
HTTPLIB_VERSION = "refs/tags/v0.37.2"
|
HTTPLIB_VERSION = "refs/tags/v0.38.0"
|
||||||
|
|
||||||
vendor = {
|
vendor = {
|
||||||
"https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",
|
"https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",
|
||||||
|
|
|
||||||
|
|
@ -563,7 +563,7 @@ def test_cancel_request():
|
||||||
except requests.exceptions.ReadTimeout:
|
except requests.exceptions.ReadTimeout:
|
||||||
pass # expected
|
pass # expected
|
||||||
# make sure the slot is free
|
# make sure the slot is free
|
||||||
time.sleep(1) # wait for HTTP_POLLING_SECONDS
|
time.sleep(2)
|
||||||
res = server.make_request("GET", "/slots")
|
res = server.make_request("GET", "/slots")
|
||||||
assert res.body[0]["is_processing"] == False
|
assert res.body[0]["is_processing"] == False
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1025,6 +1025,30 @@ bool is_valid_path(const std::string &path) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool canonicalize_path(const char *path, std::string &resolved) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
char buf[_MAX_PATH];
|
||||||
|
if (_fullpath(buf, path, _MAX_PATH) == nullptr) { return false; }
|
||||||
|
resolved = buf;
|
||||||
|
#else
|
||||||
|
char buf[PATH_MAX];
|
||||||
|
if (realpath(path, buf) == nullptr) { return false; }
|
||||||
|
resolved = buf;
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_path_within_base(const std::string &resolved_path,
|
||||||
|
const std::string &resolved_base) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
return _strnicmp(resolved_path.c_str(), resolved_base.c_str(),
|
||||||
|
resolved_base.size()) == 0;
|
||||||
|
#else
|
||||||
|
return strncmp(resolved_path.c_str(), resolved_base.c_str(),
|
||||||
|
resolved_base.size()) == 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
FileStat::FileStat(const std::string &path) {
|
FileStat::FileStat(const std::string &path) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
auto wpath = u8string_to_wstring(path.c_str());
|
auto wpath = u8string_to_wstring(path.c_str());
|
||||||
|
|
@ -2627,33 +2651,114 @@ bool can_compress_content_type(const std::string &content_type) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool parse_quality(const char *b, const char *e, std::string &token,
|
||||||
|
double &quality) {
|
||||||
|
quality = 1.0;
|
||||||
|
token.clear();
|
||||||
|
|
||||||
|
// Split on first ';': left = token name, right = parameters
|
||||||
|
const char *params_b = nullptr;
|
||||||
|
std::size_t params_len = 0;
|
||||||
|
|
||||||
|
divide(
|
||||||
|
b, static_cast<std::size_t>(e - b), ';',
|
||||||
|
[&](const char *lb, std::size_t llen, const char *rb, std::size_t rlen) {
|
||||||
|
auto r = trim(lb, lb + llen, 0, llen);
|
||||||
|
if (r.first < r.second) { token.assign(lb + r.first, lb + r.second); }
|
||||||
|
params_b = rb;
|
||||||
|
params_len = rlen;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (token.empty()) { return false; }
|
||||||
|
if (params_len == 0) { return true; }
|
||||||
|
|
||||||
|
// Scan parameters for q= (stops on first match)
|
||||||
|
bool invalid = false;
|
||||||
|
split_find(params_b, params_b + params_len, ';',
|
||||||
|
(std::numeric_limits<size_t>::max)(),
|
||||||
|
[&](const char *pb, const char *pe) -> bool {
|
||||||
|
// Match exactly "q=" or "Q=" (not "query=" etc.)
|
||||||
|
auto len = static_cast<size_t>(pe - pb);
|
||||||
|
if (len < 2) { return false; }
|
||||||
|
if ((pb[0] != 'q' && pb[0] != 'Q') || pb[1] != '=') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim the value portion
|
||||||
|
auto r = trim(pb, pe, 2, len);
|
||||||
|
if (r.first >= r.second) {
|
||||||
|
invalid = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
double v = 0.0;
|
||||||
|
auto res = from_chars(pb + r.first, pb + r.second, v);
|
||||||
|
if (res.ec != std::errc{} || v < 0.0 || v > 1.0) {
|
||||||
|
invalid = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
quality = v;
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
return !invalid;
|
||||||
|
}
|
||||||
|
|
||||||
EncodingType encoding_type(const Request &req, const Response &res) {
|
EncodingType encoding_type(const Request &req, const Response &res) {
|
||||||
auto ret =
|
if (!can_compress_content_type(res.get_header_value("Content-Type"))) {
|
||||||
detail::can_compress_content_type(res.get_header_value("Content-Type"));
|
return EncodingType::None;
|
||||||
if (!ret) { return EncodingType::None; }
|
}
|
||||||
|
|
||||||
const auto &s = req.get_header_value("Accept-Encoding");
|
const auto &s = req.get_header_value("Accept-Encoding");
|
||||||
(void)(s);
|
if (s.empty()) { return EncodingType::None; }
|
||||||
|
|
||||||
|
// Single-pass: iterate tokens and track the best supported encoding.
|
||||||
|
// Server preference breaks ties (br > gzip > zstd).
|
||||||
|
EncodingType best = EncodingType::None;
|
||||||
|
double best_q = 0.0; // q=0 means "not acceptable"
|
||||||
|
|
||||||
|
// Server preference: Brotli > Gzip > Zstd (lower = more preferred)
|
||||||
|
auto priority = [](EncodingType t) -> int {
|
||||||
|
switch (t) {
|
||||||
|
case EncodingType::Brotli: return 0;
|
||||||
|
case EncodingType::Gzip: return 1;
|
||||||
|
case EncodingType::Zstd: return 2;
|
||||||
|
default: return 3;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string name;
|
||||||
|
split(s.data(), s.data() + s.size(), ',', [&](const char *b, const char *e) {
|
||||||
|
double quality = 1.0;
|
||||||
|
if (!parse_quality(b, e, name, quality)) { return; }
|
||||||
|
if (quality <= 0.0) { return; }
|
||||||
|
|
||||||
|
EncodingType type = EncodingType::None;
|
||||||
#ifdef CPPHTTPLIB_BROTLI_SUPPORT
|
#ifdef CPPHTTPLIB_BROTLI_SUPPORT
|
||||||
// TODO: 'Accept-Encoding' has br, not br;q=0
|
if (case_ignore::equal(name, "br")) { type = EncodingType::Brotli; }
|
||||||
ret = s.find("br") != std::string::npos;
|
|
||||||
if (ret) { return EncodingType::Brotli; }
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_ZLIB_SUPPORT
|
#ifdef CPPHTTPLIB_ZLIB_SUPPORT
|
||||||
// TODO: 'Accept-Encoding' has gzip, not gzip;q=0
|
if (type == EncodingType::None && case_ignore::equal(name, "gzip")) {
|
||||||
ret = s.find("gzip") != std::string::npos;
|
type = EncodingType::Gzip;
|
||||||
if (ret) { return EncodingType::Gzip; }
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_ZSTD_SUPPORT
|
#ifdef CPPHTTPLIB_ZSTD_SUPPORT
|
||||||
// TODO: 'Accept-Encoding' has zstd, not zstd;q=0
|
if (type == EncodingType::None && case_ignore::equal(name, "zstd")) {
|
||||||
ret = s.find("zstd") != std::string::npos;
|
type = EncodingType::Zstd;
|
||||||
if (ret) { return EncodingType::Zstd; }
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return EncodingType::None;
|
if (type == EncodingType::None) { return; }
|
||||||
|
|
||||||
|
// Higher q-value wins; for equal q, server preference breaks ties
|
||||||
|
if (quality > best_q ||
|
||||||
|
(quality == best_q && priority(type) < priority(best))) {
|
||||||
|
best_q = quality;
|
||||||
|
best = type;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return best;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool nocompressor::compress(const char *data, size_t data_length,
|
bool nocompressor::compress(const char *data, size_t data_length,
|
||||||
|
|
@ -2937,6 +3042,21 @@ create_decompressor(const std::string &encoding) {
|
||||||
return decompressor;
|
return decompressor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the best available compressor and its Content-Encoding name.
|
||||||
|
// Priority: Brotli > Gzip > Zstd (matches server-side preference).
|
||||||
|
std::pair<std::unique_ptr<compressor>, const char *>
|
||||||
|
create_compressor() {
|
||||||
|
#ifdef CPPHTTPLIB_BROTLI_SUPPORT
|
||||||
|
return {detail::make_unique<brotli_compressor>(), "br"};
|
||||||
|
#elif defined(CPPHTTPLIB_ZLIB_SUPPORT)
|
||||||
|
return {detail::make_unique<gzip_compressor>(), "gzip"};
|
||||||
|
#elif defined(CPPHTTPLIB_ZSTD_SUPPORT)
|
||||||
|
return {detail::make_unique<zstd_compressor>(), "zstd"};
|
||||||
|
#else
|
||||||
|
return {nullptr, nullptr};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
bool is_prohibited_header_name(const std::string &name) {
|
bool is_prohibited_header_name(const std::string &name) {
|
||||||
using udl::operator""_t;
|
using udl::operator""_t;
|
||||||
|
|
||||||
|
|
@ -3769,7 +3889,7 @@ bool parse_accept_header(const std::string &s,
|
||||||
struct AcceptEntry {
|
struct AcceptEntry {
|
||||||
std::string media_type;
|
std::string media_type;
|
||||||
double quality;
|
double quality;
|
||||||
int order; // Original order in header
|
int order;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<AcceptEntry> entries;
|
std::vector<AcceptEntry> entries;
|
||||||
|
|
@ -3787,48 +3907,12 @@ bool parse_accept_header(const std::string &s,
|
||||||
}
|
}
|
||||||
|
|
||||||
AcceptEntry accept_entry;
|
AcceptEntry accept_entry;
|
||||||
accept_entry.quality = 1.0; // Default quality
|
|
||||||
accept_entry.order = order++;
|
accept_entry.order = order++;
|
||||||
|
|
||||||
// Find q= parameter
|
if (!parse_quality(entry.data(), entry.data() + entry.size(),
|
||||||
auto q_pos = entry.find(";q=");
|
accept_entry.media_type, accept_entry.quality)) {
|
||||||
if (q_pos == std::string::npos) { q_pos = entry.find("; q="); }
|
has_invalid_entry = true;
|
||||||
|
return;
|
||||||
if (q_pos != std::string::npos) {
|
|
||||||
// Extract media type (before q parameter)
|
|
||||||
accept_entry.media_type = trim_copy(entry.substr(0, q_pos));
|
|
||||||
|
|
||||||
// Extract quality value
|
|
||||||
auto q_start = entry.find('=', q_pos) + 1;
|
|
||||||
auto q_end = entry.find(';', q_start);
|
|
||||||
if (q_end == std::string::npos) { q_end = entry.length(); }
|
|
||||||
|
|
||||||
std::string quality_str =
|
|
||||||
trim_copy(entry.substr(q_start, q_end - q_start));
|
|
||||||
if (quality_str.empty()) {
|
|
||||||
has_invalid_entry = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
double v = 0.0;
|
|
||||||
auto res = detail::from_chars(
|
|
||||||
quality_str.data(), quality_str.data() + quality_str.size(), v);
|
|
||||||
if (res.ec == std::errc{}) {
|
|
||||||
accept_entry.quality = v;
|
|
||||||
} else {
|
|
||||||
has_invalid_entry = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Check if quality is in valid range [0.0, 1.0]
|
|
||||||
if (accept_entry.quality < 0.0 || accept_entry.quality > 1.0) {
|
|
||||||
has_invalid_entry = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// No quality parameter, use entire entry as media type
|
|
||||||
accept_entry.media_type = entry;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove additional parameters from media type
|
// Remove additional parameters from media type
|
||||||
|
|
@ -5481,7 +5565,8 @@ std::string decode_path_component(const std::string &component) {
|
||||||
// Unicode %uXXXX encoding
|
// Unicode %uXXXX encoding
|
||||||
auto val = 0;
|
auto val = 0;
|
||||||
if (detail::from_hex_to_i(component, i + 2, 4, val)) {
|
if (detail::from_hex_to_i(component, i + 2, 4, val)) {
|
||||||
// 4 digits Unicode codes
|
// 4 digits Unicode codes: val is 0x0000-0xFFFF (from 4 hex digits),
|
||||||
|
// so to_utf8 writes at most 3 bytes. buff[4] is safe.
|
||||||
char buff[4];
|
char buff[4];
|
||||||
size_t len = detail::to_utf8(val, buff);
|
size_t len = detail::to_utf8(val, buff);
|
||||||
if (len > 0) { result.append(buff, len); }
|
if (len > 0) { result.append(buff, len); }
|
||||||
|
|
@ -5586,6 +5671,30 @@ std::string decode_query_component(const std::string &component,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string sanitize_filename(const std::string &filename) {
|
||||||
|
// Extract basename: find the last path separator (/ or \)
|
||||||
|
auto pos = filename.find_last_of("/\\");
|
||||||
|
auto result =
|
||||||
|
(pos != std::string::npos) ? filename.substr(pos + 1) : filename;
|
||||||
|
|
||||||
|
// Strip null bytes
|
||||||
|
result.erase(std::remove(result.begin(), result.end(), '\0'), result.end());
|
||||||
|
|
||||||
|
// Trim whitespace
|
||||||
|
{
|
||||||
|
auto start = result.find_first_not_of(" \t");
|
||||||
|
auto end = result.find_last_not_of(" \t");
|
||||||
|
result = (start == std::string::npos)
|
||||||
|
? ""
|
||||||
|
: result.substr(start, end - start + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reject . and ..
|
||||||
|
if (result == "." || result == "..") { return ""; }
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
std::string append_query_params(const std::string &path,
|
std::string append_query_params(const std::string &path,
|
||||||
const Params ¶ms) {
|
const Params ¶ms) {
|
||||||
std::string path_with_query = path;
|
std::string path_with_query = path;
|
||||||
|
|
@ -6714,7 +6823,18 @@ bool Server::set_mount_point(const std::string &mount_point,
|
||||||
if (stat.is_dir()) {
|
if (stat.is_dir()) {
|
||||||
std::string mnt = !mount_point.empty() ? mount_point : "/";
|
std::string mnt = !mount_point.empty() ? mount_point : "/";
|
||||||
if (!mnt.empty() && mnt[0] == '/') {
|
if (!mnt.empty() && mnt[0] == '/') {
|
||||||
base_dirs_.push_back({std::move(mnt), dir, std::move(headers)});
|
std::string resolved_base;
|
||||||
|
if (detail::canonicalize_path(dir.c_str(), resolved_base)) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
if (resolved_base.back() != '\\' && resolved_base.back() != '/') {
|
||||||
|
resolved_base += '\\';
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (resolved_base.back() != '/') { resolved_base += '/'; }
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
base_dirs_.push_back(
|
||||||
|
{std::move(mnt), dir, std::move(resolved_base), std::move(headers)});
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -6874,6 +6994,20 @@ Server &Server::set_payload_max_length(size_t length) {
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Server &Server::set_websocket_ping_interval(time_t sec) {
|
||||||
|
websocket_ping_interval_sec_ = sec;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Rep, class Period>
|
||||||
|
Server &Server::set_websocket_ping_interval(
|
||||||
|
const std::chrono::duration<Rep, Period> &duration) {
|
||||||
|
detail::duration_to_sec_and_usec(duration, [&](time_t sec, time_t /*usec*/) {
|
||||||
|
set_websocket_ping_interval(sec);
|
||||||
|
});
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
bool Server::bind_to_port(const std::string &host, int port,
|
bool Server::bind_to_port(const std::string &host, int port,
|
||||||
int socket_flags) {
|
int socket_flags) {
|
||||||
auto ret = bind_internal(host, port, socket_flags);
|
auto ret = bind_internal(host, port, socket_flags);
|
||||||
|
|
@ -7294,6 +7428,18 @@ bool Server::handle_file_request(Request &req, Response &res) {
|
||||||
auto path = entry.base_dir + sub_path;
|
auto path = entry.base_dir + sub_path;
|
||||||
if (path.back() == '/') { path += "index.html"; }
|
if (path.back() == '/') { path += "index.html"; }
|
||||||
|
|
||||||
|
// Defense-in-depth: is_valid_path blocks ".." traversal in the URL,
|
||||||
|
// but symlinks/junctions can still escape the base directory.
|
||||||
|
if (!entry.resolved_base_dir.empty()) {
|
||||||
|
std::string resolved_path;
|
||||||
|
if (detail::canonicalize_path(path.c_str(), resolved_path) &&
|
||||||
|
!detail::is_path_within_base(resolved_path,
|
||||||
|
entry.resolved_base_dir)) {
|
||||||
|
res.status = StatusCode::Forbidden_403;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
detail::FileStat stat(path);
|
detail::FileStat stat(path);
|
||||||
|
|
||||||
if (stat.is_dir()) {
|
if (stat.is_dir()) {
|
||||||
|
|
@ -8012,7 +8158,7 @@ Server::process_request(Stream &strm, const std::string &remote_addr,
|
||||||
{
|
{
|
||||||
// Use WebSocket-specific read timeout instead of HTTP timeout
|
// Use WebSocket-specific read timeout instead of HTTP timeout
|
||||||
strm.set_read_timeout(CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND, 0);
|
strm.set_read_timeout(CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND, 0);
|
||||||
ws::WebSocket ws(strm, req, true);
|
ws::WebSocket ws(strm, req, true, websocket_ping_interval_sec_);
|
||||||
entry.handler(req, ws);
|
entry.handler(req, ws);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -8256,6 +8402,13 @@ bool ClientImpl::ensure_socket_connection(Socket &socket, Error &error) {
|
||||||
return create_and_connect_socket(socket, error);
|
return create_and_connect_socket(socket, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ClientImpl::setup_proxy_connection(
|
||||||
|
Socket & /*socket*/,
|
||||||
|
std::chrono::time_point<std::chrono::steady_clock> /*start_time*/,
|
||||||
|
Response & /*res*/, bool & /*success*/, Error & /*error*/) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void ClientImpl::shutdown_ssl(Socket & /*socket*/,
|
void ClientImpl::shutdown_ssl(Socket & /*socket*/,
|
||||||
bool /*shutdown_gracefully*/) {
|
bool /*shutdown_gracefully*/) {
|
||||||
// If there are any requests in flight from threads other than us, then it's
|
// If there are any requests in flight from threads other than us, then it's
|
||||||
|
|
@ -8377,27 +8530,14 @@ bool ClientImpl::send_(Request &req, Response &res, Error &error) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_SSL_ENABLED
|
{
|
||||||
// TODO: refactoring
|
auto success = true;
|
||||||
if (is_ssl()) {
|
if (!setup_proxy_connection(socket_, req.start_time_, res, success,
|
||||||
auto &scli = static_cast<SSLClient &>(*this);
|
error)) {
|
||||||
if (!proxy_host_.empty() && proxy_port_ != -1) {
|
if (!success) { output_error_log(error, &req); }
|
||||||
auto success = false;
|
return success;
|
||||||
if (!scli.connect_with_proxy(socket_, req.start_time_, res, success,
|
|
||||||
error)) {
|
|
||||||
if (!success) { output_error_log(error, &req); }
|
|
||||||
return success;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!proxy_host_.empty() && proxy_port_ != -1) {
|
|
||||||
if (!scli.initialize_ssl(socket_, error)) {
|
|
||||||
output_error_log(error, &req);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark the current socket as being in use so that it cannot be closed by
|
// Mark the current socket as being in use so that it cannot be closed by
|
||||||
|
|
@ -8558,17 +8698,15 @@ ClientImpl::open_stream(const std::string &method, const std::string &path,
|
||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_SSL_ENABLED
|
{
|
||||||
if (is_ssl()) {
|
auto success = true;
|
||||||
auto &scli = static_cast<SSLClient &>(*this);
|
auto start_time = std::chrono::steady_clock::now();
|
||||||
if (!proxy_host_.empty() && proxy_port_ != -1) {
|
if (!setup_proxy_connection(socket_, start_time, *handle.response,
|
||||||
if (!scli.initialize_ssl(socket_, handle.error)) {
|
success, handle.error)) {
|
||||||
handle.response.reset();
|
if (!success) { handle.response.reset(); }
|
||||||
return handle;
|
return handle;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
transfer_socket_ownership_to_handle(handle);
|
transfer_socket_ownership_to_handle(handle);
|
||||||
|
|
@ -8847,7 +8985,7 @@ bool ClientImpl::handle_request(Stream &strm, Request &req,
|
||||||
|
|
||||||
if (res.get_header_value("Connection") == "close" ||
|
if (res.get_header_value("Connection") == "close" ||
|
||||||
(res.version == "HTTP/1.0" && res.reason != "Connection established")) {
|
(res.version == "HTTP/1.0" && res.reason != "Connection established")) {
|
||||||
// TODO this requires a not-entirely-obvious chain of calls to be correct
|
// NOTE: this requires a not-entirely-obvious chain of calls to be correct
|
||||||
// for this to be safe.
|
// for this to be safe.
|
||||||
|
|
||||||
// This is safe to call because handle_request is only called by send_
|
// This is safe to call because handle_request is only called by send_
|
||||||
|
|
@ -9086,14 +9224,9 @@ bool ClientImpl::write_content_with_provider(Stream &strm,
|
||||||
auto is_shutting_down = []() { return false; };
|
auto is_shutting_down = []() { return false; };
|
||||||
|
|
||||||
if (req.is_chunked_content_provider_) {
|
if (req.is_chunked_content_provider_) {
|
||||||
// TODO: Brotli support
|
auto compressor = compress_ ? detail::create_compressor().first
|
||||||
std::unique_ptr<detail::compressor> compressor;
|
: std::unique_ptr<detail::compressor>();
|
||||||
#ifdef CPPHTTPLIB_ZLIB_SUPPORT
|
if (!compressor) {
|
||||||
if (compress_) {
|
|
||||||
compressor = detail::make_unique<detail::gzip_compressor>();
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
compressor = detail::make_unique<detail::nocompressor>();
|
compressor = detail::make_unique<detail::nocompressor>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -9324,14 +9457,15 @@ ClientImpl::send_with_content_provider_and_receiver(
|
||||||
Error &error) {
|
Error &error) {
|
||||||
if (!content_type.empty()) { req.set_header("Content-Type", content_type); }
|
if (!content_type.empty()) { req.set_header("Content-Type", content_type); }
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_ZLIB_SUPPORT
|
auto enc = compress_
|
||||||
if (compress_) { req.set_header("Content-Encoding", "gzip"); }
|
? detail::create_compressor()
|
||||||
#endif
|
: std::pair<std::unique_ptr<detail::compressor>, const char *>(
|
||||||
|
nullptr, nullptr);
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_ZLIB_SUPPORT
|
if (enc.second) { req.set_header("Content-Encoding", enc.second); }
|
||||||
if (compress_ && !content_provider_without_length) {
|
|
||||||
// TODO: Brotli support
|
if (enc.first && !content_provider_without_length) {
|
||||||
detail::gzip_compressor compressor;
|
auto &compressor = enc.first;
|
||||||
|
|
||||||
if (content_provider) {
|
if (content_provider) {
|
||||||
auto ok = true;
|
auto ok = true;
|
||||||
|
|
@ -9342,7 +9476,7 @@ ClientImpl::send_with_content_provider_and_receiver(
|
||||||
if (ok) {
|
if (ok) {
|
||||||
auto last = offset + data_len == content_length;
|
auto last = offset + data_len == content_length;
|
||||||
|
|
||||||
auto ret = compressor.compress(
|
auto ret = compressor->compress(
|
||||||
data, data_len, last,
|
data, data_len, last,
|
||||||
[&](const char *compressed_data, size_t compressed_data_len) {
|
[&](const char *compressed_data, size_t compressed_data_len) {
|
||||||
req.body.append(compressed_data, compressed_data_len);
|
req.body.append(compressed_data, compressed_data_len);
|
||||||
|
|
@ -9366,19 +9500,17 @@ ClientImpl::send_with_content_provider_and_receiver(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!compressor.compress(body, content_length, true,
|
if (!compressor->compress(body, content_length, true,
|
||||||
[&](const char *data, size_t data_len) {
|
[&](const char *data, size_t data_len) {
|
||||||
req.body.append(data, data_len);
|
req.body.append(data, data_len);
|
||||||
return true;
|
return true;
|
||||||
})) {
|
})) {
|
||||||
error = Error::Compression;
|
error = Error::Compression;
|
||||||
output_error_log(error, &req);
|
output_error_log(error, &req);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else
|
} else {
|
||||||
#endif
|
|
||||||
{
|
|
||||||
if (content_provider) {
|
if (content_provider) {
|
||||||
req.content_length_ = content_length;
|
req.content_length_ = content_length;
|
||||||
req.content_provider_ = std::move(content_provider);
|
req.content_provider_ = std::move(content_provider);
|
||||||
|
|
@ -11545,6 +11677,24 @@ bool SSLClient::create_and_connect_socket(Socket &socket, Error &error) {
|
||||||
return ClientImpl::create_and_connect_socket(socket, error);
|
return ClientImpl::create_and_connect_socket(socket, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SSLClient::setup_proxy_connection(
|
||||||
|
Socket &socket,
|
||||||
|
std::chrono::time_point<std::chrono::steady_clock> start_time,
|
||||||
|
Response &res, bool &success, Error &error) {
|
||||||
|
if (proxy_host_.empty() || proxy_port_ == -1) { return true; }
|
||||||
|
|
||||||
|
if (!connect_with_proxy(socket, start_time, res, success, error)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!initialize_ssl(socket, error)) {
|
||||||
|
success = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Assumes that socket_mutex_ is locked and that there are no requests in
|
// Assumes that socket_mutex_ is locked and that there are no requests in
|
||||||
// flight
|
// flight
|
||||||
bool SSLClient::connect_with_proxy(
|
bool SSLClient::connect_with_proxy(
|
||||||
|
|
@ -16061,11 +16211,11 @@ WebSocket::~WebSocket() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void WebSocket::start_heartbeat() {
|
void WebSocket::start_heartbeat() {
|
||||||
|
if (ping_interval_sec_ == 0) { return; }
|
||||||
ping_thread_ = std::thread([this]() {
|
ping_thread_ = std::thread([this]() {
|
||||||
std::unique_lock<std::mutex> lock(ping_mutex_);
|
std::unique_lock<std::mutex> lock(ping_mutex_);
|
||||||
while (!closed_) {
|
while (!closed_) {
|
||||||
ping_cv_.wait_for(lock, std::chrono::seconds(
|
ping_cv_.wait_for(lock, std::chrono::seconds(ping_interval_sec_));
|
||||||
CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND));
|
|
||||||
if (closed_) { break; }
|
if (closed_) { break; }
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
if (!send_frame(Opcode::Ping, nullptr, 0)) {
|
if (!send_frame(Opcode::Ping, nullptr, 0)) {
|
||||||
|
|
@ -16203,7 +16353,8 @@ bool WebSocketClient::connect() {
|
||||||
Request req;
|
Request req;
|
||||||
req.method = "GET";
|
req.method = "GET";
|
||||||
req.path = path_;
|
req.path = path_;
|
||||||
ws_ = std::unique_ptr<WebSocket>(new WebSocket(std::move(strm), req, false));
|
ws_ = std::unique_ptr<WebSocket>(
|
||||||
|
new WebSocket(std::move(strm), req, false, websocket_ping_interval_sec_));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -16243,6 +16394,10 @@ void WebSocketClient::set_write_timeout(time_t sec, time_t usec) {
|
||||||
write_timeout_usec_ = usec;
|
write_timeout_usec_ = usec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WebSocketClient::set_websocket_ping_interval(time_t sec) {
|
||||||
|
websocket_ping_interval_sec_ = sec;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_SSL_ENABLED
|
#ifdef CPPHTTPLIB_SSL_ENABLED
|
||||||
|
|
||||||
void WebSocketClient::set_ca_cert_path(const std::string &path) {
|
void WebSocketClient::set_ca_cert_path(const std::string &path) {
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,8 @@
|
||||||
#ifndef CPPHTTPLIB_HTTPLIB_H
|
#ifndef CPPHTTPLIB_HTTPLIB_H
|
||||||
#define CPPHTTPLIB_HTTPLIB_H
|
#define CPPHTTPLIB_HTTPLIB_H
|
||||||
|
|
||||||
#define CPPHTTPLIB_VERSION "0.37.2"
|
#define CPPHTTPLIB_VERSION "0.38.0"
|
||||||
#define CPPHTTPLIB_VERSION_NUM "0x002502"
|
#define CPPHTTPLIB_VERSION_NUM "0x002600"
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0A00
|
#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0A00
|
||||||
|
|
@ -1666,6 +1666,11 @@ public:
|
||||||
|
|
||||||
Server &set_payload_max_length(size_t length);
|
Server &set_payload_max_length(size_t length);
|
||||||
|
|
||||||
|
Server &set_websocket_ping_interval(time_t sec);
|
||||||
|
template <class Rep, class Period>
|
||||||
|
Server &set_websocket_ping_interval(
|
||||||
|
const std::chrono::duration<Rep, Period> &duration);
|
||||||
|
|
||||||
bool bind_to_port(const std::string &host, int port, int socket_flags = 0);
|
bool bind_to_port(const std::string &host, int port, int socket_flags = 0);
|
||||||
int bind_to_any_port(const std::string &host, int socket_flags = 0);
|
int bind_to_any_port(const std::string &host, int socket_flags = 0);
|
||||||
bool listen_after_bind();
|
bool listen_after_bind();
|
||||||
|
|
@ -1700,6 +1705,8 @@ protected:
|
||||||
time_t idle_interval_sec_ = CPPHTTPLIB_IDLE_INTERVAL_SECOND;
|
time_t idle_interval_sec_ = CPPHTTPLIB_IDLE_INTERVAL_SECOND;
|
||||||
time_t idle_interval_usec_ = CPPHTTPLIB_IDLE_INTERVAL_USECOND;
|
time_t idle_interval_usec_ = CPPHTTPLIB_IDLE_INTERVAL_USECOND;
|
||||||
size_t payload_max_length_ = CPPHTTPLIB_PAYLOAD_MAX_LENGTH;
|
size_t payload_max_length_ = CPPHTTPLIB_PAYLOAD_MAX_LENGTH;
|
||||||
|
time_t websocket_ping_interval_sec_ =
|
||||||
|
CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using Handlers =
|
using Handlers =
|
||||||
|
|
@ -1769,6 +1776,7 @@ private:
|
||||||
struct MountPointEntry {
|
struct MountPointEntry {
|
||||||
std::string mount_point;
|
std::string mount_point;
|
||||||
std::string base_dir;
|
std::string base_dir;
|
||||||
|
std::string resolved_base_dir;
|
||||||
Headers headers;
|
Headers headers;
|
||||||
};
|
};
|
||||||
std::vector<MountPointEntry> base_dirs_;
|
std::vector<MountPointEntry> base_dirs_;
|
||||||
|
|
@ -2186,6 +2194,10 @@ protected:
|
||||||
|
|
||||||
virtual bool create_and_connect_socket(Socket &socket, Error &error);
|
virtual bool create_and_connect_socket(Socket &socket, Error &error);
|
||||||
virtual bool ensure_socket_connection(Socket &socket, Error &error);
|
virtual bool ensure_socket_connection(Socket &socket, Error &error);
|
||||||
|
virtual bool setup_proxy_connection(
|
||||||
|
Socket &socket,
|
||||||
|
std::chrono::time_point<std::chrono::steady_clock> start_time,
|
||||||
|
Response &res, bool &success, Error &error);
|
||||||
|
|
||||||
// All of:
|
// All of:
|
||||||
// shutdown_ssl
|
// shutdown_ssl
|
||||||
|
|
@ -2712,6 +2724,10 @@ private:
|
||||||
std::function<bool(Stream &strm)> callback) override;
|
std::function<bool(Stream &strm)> callback) override;
|
||||||
bool is_ssl() const override;
|
bool is_ssl() const override;
|
||||||
|
|
||||||
|
bool setup_proxy_connection(
|
||||||
|
Socket &socket,
|
||||||
|
std::chrono::time_point<std::chrono::steady_clock> start_time,
|
||||||
|
Response &res, bool &success, Error &error) override;
|
||||||
bool connect_with_proxy(
|
bool connect_with_proxy(
|
||||||
Socket &sock,
|
Socket &sock,
|
||||||
std::chrono::time_point<std::chrono::steady_clock> start_time,
|
std::chrono::time_point<std::chrono::steady_clock> start_time,
|
||||||
|
|
@ -2911,6 +2927,8 @@ std::string encode_query_component(const std::string &component,
|
||||||
std::string decode_query_component(const std::string &component,
|
std::string decode_query_component(const std::string &component,
|
||||||
bool plus_as_space = true);
|
bool plus_as_space = true);
|
||||||
|
|
||||||
|
std::string sanitize_filename(const std::string &filename);
|
||||||
|
|
||||||
std::string append_query_params(const std::string &path, const Params ¶ms);
|
std::string append_query_params(const std::string &path, const Params ¶ms);
|
||||||
|
|
||||||
std::pair<std::string, std::string> make_range_header(const Ranges &ranges);
|
std::pair<std::string, std::string> make_range_header(const Ranges &ranges);
|
||||||
|
|
@ -3714,15 +3732,19 @@ private:
|
||||||
friend class httplib::Server;
|
friend class httplib::Server;
|
||||||
friend class WebSocketClient;
|
friend class WebSocketClient;
|
||||||
|
|
||||||
WebSocket(Stream &strm, const Request &req, bool is_server)
|
WebSocket(
|
||||||
: strm_(strm), req_(req), is_server_(is_server) {
|
Stream &strm, const Request &req, bool is_server,
|
||||||
|
time_t ping_interval_sec = CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND)
|
||||||
|
: strm_(strm), req_(req), is_server_(is_server),
|
||||||
|
ping_interval_sec_(ping_interval_sec) {
|
||||||
start_heartbeat();
|
start_heartbeat();
|
||||||
}
|
}
|
||||||
|
|
||||||
WebSocket(std::unique_ptr<Stream> &&owned_strm, const Request &req,
|
WebSocket(
|
||||||
bool is_server)
|
std::unique_ptr<Stream> &&owned_strm, const Request &req, bool is_server,
|
||||||
|
time_t ping_interval_sec = CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND)
|
||||||
: strm_(*owned_strm), owned_strm_(std::move(owned_strm)), req_(req),
|
: strm_(*owned_strm), owned_strm_(std::move(owned_strm)), req_(req),
|
||||||
is_server_(is_server) {
|
is_server_(is_server), ping_interval_sec_(ping_interval_sec) {
|
||||||
start_heartbeat();
|
start_heartbeat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3733,6 +3755,7 @@ private:
|
||||||
std::unique_ptr<Stream> owned_strm_;
|
std::unique_ptr<Stream> owned_strm_;
|
||||||
Request req_;
|
Request req_;
|
||||||
bool is_server_;
|
bool is_server_;
|
||||||
|
time_t ping_interval_sec_;
|
||||||
std::atomic<bool> closed_{false};
|
std::atomic<bool> closed_{false};
|
||||||
std::mutex write_mutex_;
|
std::mutex write_mutex_;
|
||||||
std::thread ping_thread_;
|
std::thread ping_thread_;
|
||||||
|
|
@ -3761,6 +3784,7 @@ public:
|
||||||
const std::string &subprotocol() const;
|
const std::string &subprotocol() const;
|
||||||
void set_read_timeout(time_t sec, time_t usec = 0);
|
void set_read_timeout(time_t sec, time_t usec = 0);
|
||||||
void set_write_timeout(time_t sec, time_t usec = 0);
|
void set_write_timeout(time_t sec, time_t usec = 0);
|
||||||
|
void set_websocket_ping_interval(time_t sec);
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_SSL_ENABLED
|
#ifdef CPPHTTPLIB_SSL_ENABLED
|
||||||
void set_ca_cert_path(const std::string &path);
|
void set_ca_cert_path(const std::string &path);
|
||||||
|
|
@ -3784,6 +3808,8 @@ private:
|
||||||
time_t read_timeout_usec_ = 0;
|
time_t read_timeout_usec_ = 0;
|
||||||
time_t write_timeout_sec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND;
|
time_t write_timeout_sec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND;
|
||||||
time_t write_timeout_usec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND;
|
time_t write_timeout_usec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND;
|
||||||
|
time_t websocket_ping_interval_sec_ =
|
||||||
|
CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND;
|
||||||
|
|
||||||
#ifdef CPPHTTPLIB_SSL_ENABLED
|
#ifdef CPPHTTPLIB_SSL_ENABLED
|
||||||
bool is_ssl_ = false;
|
bool is_ssl_ = false;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue