diff --git a/.github/workflows/build-3rd-party.yml b/.github/workflows/build-3rd-party.yml new file mode 100644 index 0000000000..642d978644 --- /dev/null +++ b/.github/workflows/build-3rd-party.yml @@ -0,0 +1,57 @@ +name: CI (3rd-party) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-3rd-party.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ubuntu-24-llguidance: + runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_LLGUIDANCE=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml new file mode 100644 index 0000000000..cd9d99ffab --- /dev/null +++ b/.github/workflows/build-android.yml @@ -0,0 +1,140 @@ +name: CI (android) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-android.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-android.yml', + 'examples/llama.android/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + android: + runs-on: ubuntu-latest + + steps: + - name: Clone + uses: actions/checkout@v6 + + # Disabled due to size (400MB) and always 0 cache hits + # - name: ccache + # uses: ggml-org/ccache-action@v1.2.16 + # with: + # key: android-build + # evict-old-files: 1d + + - name: Set up JDK + uses: actions/setup-java@v5 + with: + java-version: 17 + distribution: zulu + + - name: Setup Android SDK + uses: android-actions/setup-android@v3 + with: + log-accepted-android-sdk-licenses: false + + - name: Build + run: | + cd examples/llama.android + ./gradlew build --no-daemon + + android-ndk: + runs-on: ubuntu-latest + + env: + OPENCL_VERSION: 2025.07.22 + + strategy: + matrix: + include: + - build: 'arm64-cpu' + defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF' + - build: 'arm64-snapdragon' + defines: '--preset arm64-android-snapdragon-release' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Install OpenCL Headers and Libs + id: install_opencl + if: ${{ matrix.build == 'arm64-snapdragon' }} + run: | + mkdir opencl + curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + tar -xaf opencl/headers.tar.gz -C opencl + tar -xaf opencl/clhpp.tar.gz -C opencl + tar -xaf opencl/icd-loader.tar.gz -C opencl + sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include + sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL + cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION} + cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared + cmake --build build + sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android + rm -rf opencl + + - name: Install Hexagon SDK + id: install_hexsdk + if: ${{ matrix.build == 'arm64-snapdragon' }} + env: + HEXSDK_VER: 6.4.0.2 + HEXTLS_VER: 19.0.04 + run: | + curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz + mkdir hex-sdk + tar -xaf hex-sdk.tar.gz -C hex-sdk + ls -l hex-sdk + sudo mv hex-sdk /opt/hexagon + echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV" + echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV" + echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV" + echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV" + echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV" + echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV" + + - name: Update CMake presets + id: update_presets + if: ${{ matrix.build == 'arm64-snapdragon' }} + run: | + cp docs/backend/snapdragon/CMakeUserPresets.json . + + - name: Build + id: ndk_build + run: | + cmake ${{ matrix.defines }} -B build + cmake --build build + cmake --install build --prefix pkg-adb/llama.cpp + + - name: Test + id: cmake_test + run: | + echo "FIXME: test on devices" diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml new file mode 100644 index 0000000000..51f0ef2302 --- /dev/null +++ b/.github/workflows/build-apple.yml @@ -0,0 +1,214 @@ +name: CI (apple) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-apple.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.swift', + '**/*.m', + '**/*.metal' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-apple.yml', + 'ggml/src/ggml-metal/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + macOS-latest-ios: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-ios + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macos-latest-ios-xcode: + runs-on: macos-latest + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Setup Xcode + uses: ggml-org/setup-xcode@v1 + with: + xcode-version: latest-stable + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + - name: xcodebuild for swift package + id: xcodebuild + run: | + ./build-xcframework.sh + + - name: Upload xcframework artifact + uses: actions/upload-artifact@v6 + with: + name: llama-xcframework + path: build-apple/llama.xcframework/ + retention-days: 1 + + - name: Build Xcode project + run: | + xcodebuild -downloadPlatform iOS + xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + + macOS-latest-tvos: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-tvos + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=tvOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-visionos: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=visionOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-swift: + runs-on: macos-latest + needs: macos-latest-ios-xcode + + strategy: + matrix: + destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-swift + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Download xcframework artifact + uses: actions/download-artifact@v7 + with: + name: llama-xcframework + path: build-apple/llama.xcframework/ + + - name: Build llama.cpp with CMake + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) diff --git a/.github/workflows/build-cache.yml b/.github/workflows/build-cache.yml index dffbf2b4ab..bc0a92c7fc 100644 --- a/.github/workflows/build-cache.yml +++ b/.github/workflows/build-cache.yml @@ -37,31 +37,31 @@ jobs: path: ./vulkan_sdk version: ${{ env.VULKAN_SDK_VERSION }} - ubuntu-24-spacemit-cache: - runs-on: ubuntu-24.04 + #ubuntu-24-spacemit-cache: + # runs-on: ubuntu-24.04 - env: - # Make sure this is in sync with build-linux-cross.yml - SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2" + # env: + # # Make sure this is in sync with build-linux-cross.yml + # SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2" - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + # steps: + # - name: Clone + # id: checkout + # uses: actions/checkout@v6 - - name: Setup Cache - uses: actions/cache@v5 - id: cache-toolchain - with: - path: ./spacemit_toolchain - key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} + # - name: Setup Cache + # uses: actions/cache@v5 + # id: cache-toolchain + # with: + # path: ./spacemit_toolchain + # key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} - - name: Setup SpacemiT Toolchain - if: steps.cache-toolchain.outputs.cache-hit != 'true' - uses: ./.github/actions/linux-setup-spacemit - with: - path: ./spacemit_toolchain - version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }} + # - name: Setup SpacemiT Toolchain + # if: steps.cache-toolchain.outputs.cache-hit != 'true' + # uses: ./.github/actions/linux-setup-spacemit + # with: + # path: ./spacemit_toolchain + # version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }} ubuntu-24-openvino-cache: runs-on: ubuntu-24.04 diff --git a/.github/workflows/build-cann.yml b/.github/workflows/build-cann.yml new file mode 100644 index 0000000000..de641ca148 --- /dev/null +++ b/.github/workflows/build-cann.yml @@ -0,0 +1,102 @@ +name: CI (cann) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-cann.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-cann.yml', + 'ggml/src/ggml-cann/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + openEuler-latest-cann: + defaults: + run: + shell: bash -el {0} + strategy: + matrix: + arch: [x86, aarch64] + chip_type: ['910b', '310p'] + build: ['Release'] + use_acl_graph: ['on', 'off'] + exclude: + # 310P does not support USE_ACL_GRAPH=on + - chip_type: '310p' + use_acl_graph: 'on' + runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Free up disk space + uses: ggml-org/free-disk-space@v1.3.1 + with: + tool-cache: true + + - name: Set container image + id: cann-image + run: | + image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}" + echo "image=${image}" >> "${GITHUB_OUTPUT}" + + - name: Pull container image + run: docker pull "${{ steps.cann-image.outputs.image }}" + + - name: Build + env: + BUILD_TYPE: ${{ matrix.build }} + SOC_TYPE: ascend${{ matrix.chip_type }} + USE_ACL_GRAPH: ${{ matrix.use_acl_graph }} + run: | + HOST_UID=$(id -u) + HOST_GID=$(id -g) + + docker run --rm \ + -v "${PWD}:/workspace" \ + -w /workspace \ + -e SOC_TYPE=${SOC_TYPE} \ + -e BUILD_TYPE=${BUILD_TYPE} \ + -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \ + "${{ steps.cann-image.outputs.image }}" \ + bash -lc ' + set -e + yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel + yum clean all && rm -rf /var/cache/yum + git config --global --add safe.directory "/workspace" + export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DGGML_CANN=on \ + -DSOC_TYPE=${SOC_TYPE} \ + -DUSE_ACL_GRAPH=${USE_ACL_GRAPH} + cmake --build build -j $(nproc) + + chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build + ' diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-cross.yml similarity index 93% rename from .github/workflows/build-linux-cross.yml rename to .github/workflows/build-cross.yml index dbcc1ee2ae..74508129ac 100644 --- a/.github/workflows/build-linux-cross.yml +++ b/.github/workflows/build-cross.yml @@ -1,7 +1,24 @@ -name: Build on Linux using cross-compiler +name: CI (cross) on: + # only manual triggers due to low-importance of the workflows + # TODO: for regular runs, provision dedicated self-hosted runners workflow_dispatch: - workflow_call: + push: + branches: + - master + paths: [ + '.github/workflows/build-cross.yml', + 'ggml/src/spacemit/*', + 'ggml/src/arch/loongarch/*' + ] + # run once every week + schedule: + - cron: '0 0 * * 0' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + jobs: # ubuntu-24-riscv64-cpu-cross: @@ -264,15 +281,15 @@ jobs: steps: - uses: actions/checkout@v6 - - name: Use SpacemiT Toolchain Cache - uses: actions/cache@v5 - id: cache-toolchain - with: - path: ./spacemit_toolchain - key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} + #- name: Use SpacemiT Toolchain Cache + # uses: actions/cache@v5 + # id: cache-toolchain + # with: + # path: ./spacemit_toolchain + # key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} - name: Setup SpacemiT Toolchain - if: steps.cache-toolchain.outputs.cache-hit != 'true' + #if: steps.cache-toolchain.outputs.cache-hit != 'true' uses: ./.github/actions/linux-setup-spacemit with: path: ./spacemit_toolchain diff --git a/.github/workflows/build-msys.yml b/.github/workflows/build-msys.yml new file mode 100644 index 0000000000..431d9b6a53 --- /dev/null +++ b/.github/workflows/build-msys.yml @@ -0,0 +1,72 @@ +name: CI (msys) + +on: + # only manual triggers due to low-importance of the workflows + # TODO: for regular runs, provision dedicated self-hosted runners + workflow_dispatch: + # run once every week + schedule: + - cron: '0 0 * * 0' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + windows-msys2: + runs-on: windows-2025 + + strategy: + fail-fast: false + matrix: + include: + - { sys: UCRT64, env: ucrt-x86_64, build: Release } + - { sys: CLANG64, env: clang-x86_64, build: Release } + + steps: + - name: Clone + uses: actions/checkout@v6 + + #- name: ccache + # uses: ggml-org/ccache-action@v1.2.16 + # with: + # key: windows-msys2 + # variant: ccache + # evict-old-files: 1d + # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Setup ${{ matrix.sys }} + uses: msys2/setup-msys2@v2 + with: + update: true + msystem: ${{matrix.sys}} + install: >- + base-devel + git + mingw-w64-${{matrix.env}}-toolchain + mingw-w64-${{matrix.env}}-cmake + mingw-w64-${{matrix.env}}-openblas + + - name: Build using CMake + shell: msys2 {0} + run: | + cmake -B build + cmake --build build --config ${{ matrix.build }} -j $(nproc) + + - name: Clean after building using CMake + shell: msys2 {0} + run: | + rm -rf build + + - name: Build using CMake w/ OpenBLAS + shell: msys2 {0} + run: | + cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS + cmake --build build --config ${{ matrix.build }} -j $(nproc) diff --git a/.github/workflows/build-riscv.yml b/.github/workflows/build-riscv.yml new file mode 100644 index 0000000000..36a3a1155a --- /dev/null +++ b/.github/workflows/build-riscv.yml @@ -0,0 +1,136 @@ +name: CI (riscv) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-riscv.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-riscv.yml', + 'ggml/src/ggml-cpu/arch/riscv/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ubuntu-riscv64-native-sanitizer: + runs-on: RISCV64 + + continue-on-error: true + + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug] + + steps: + - name: Install dependencies + run: | + sudo apt-get update + + # Install necessary packages + sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs + + # Set gcc-14 and g++-14 as the default compilers + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc + sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ + + # Install Rust stable version + rustup install stable + rustup default stable + + git lfs install + + - name: GCC version check + run: | + gcc --version + g++ --version + + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Setup ccache + run: | + # Unique cache directory per matrix combination + export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}" + mkdir -p "$CCACHE_DIR" + + # Configure ccache + ccache --set-config=max_size=5G + ccache --set-config=compression=true + ccache --set-config=compression_level=6 + ccache --set-config=cache_dir="$CCACHE_DIR" + ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime + ccache --set-config=hash_dir=false + + # Export for subsequent steps + echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + + - name: Build + id: cmake_build + if: ${{ matrix.sanitizer != 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=ON \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Build (no OpenMP) + id: cmake_build_no_openmp + if: ${{ matrix.sanitizer == 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 diff --git a/.github/workflows/build-sanitize.yml b/.github/workflows/build-sanitize.yml new file mode 100644 index 0000000000..0b17857504 --- /dev/null +++ b/.github/workflows/build-sanitize.yml @@ -0,0 +1,87 @@ +name: CI (sanitize) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-sanitize.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ubuntu-latest-sanitizer: + runs-on: ubuntu-latest + + continue-on-error: true + + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-latest-sanitizer-${{ matrix.sanitizer }} + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + + - name: Build + id: cmake_build + if: ${{ matrix.sanitizer != 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Build (no OpenMP) + id: cmake_build_no_openmp + if: ${{ matrix.sanitizer == 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=OFF + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml index eba06b96bc..7c7710fe45 100644 --- a/.github/workflows/build-self-hosted.yml +++ b/.github/workflows/build-self-hosted.yml @@ -222,15 +222,7 @@ jobs: id: checkout uses: actions/checkout@v6 - - name: Use OpenVINO Toolkit Cache - uses: actions/cache@v5 - id: cache-openvino - with: - path: ./openvino_toolkit - key: openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }} - - name: Setup OpenVINO Toolkit - if: steps.cache-openvino.outputs.cache-hit != 'true' uses: ./.github/actions/linux-setup-openvino with: path: ./openvino_toolkit diff --git a/.github/workflows/build-vulkan.yml b/.github/workflows/build-vulkan.yml new file mode 100644 index 0000000000..b25ec51471 --- /dev/null +++ b/.github/workflows/build-vulkan.yml @@ -0,0 +1,96 @@ +name: CI (vulkan) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-vulkan.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.comp', + '**/*.glsl' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-vulkan.yml', + 'ggml/src/ggml-vulkan/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ubuntu-24-vulkan-llvmpipe: + runs-on: ubuntu-24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-24-vulkan-llvmpipe + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo add-apt-repository -y ppa:kisak/kisak-mesa + sudo apt-get update -y + sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev + + - name: Get latest Vulkan SDK version + id: vulkan_sdk_version + run: | + echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV" + + - name: Use Vulkan SDK Cache + uses: actions/cache@v5 + id: cache-sdk + with: + path: ./vulkan_sdk + key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }} + + - name: Setup Vulkan SDK + if: steps.cache-sdk.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-vulkan-llvmpipe + with: + path: ./vulkan_sdk + version: ${{ env.VULKAN_SDK_VERSION }} + + - name: Build + id: cmake_build + run: | + source ./vulkan_sdk/setup-env.sh + cmake -B build \ + -DGGML_VULKAN=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + export GGML_VK_VISIBLE_DEVICES=0 + export GGML_VK_DISABLE_F16=1 + export GGML_VK_DISABLE_COOPMAT=1 + # This is using llvmpipe and runs slower than other backends + ctest -L main --verbose --timeout 4800 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 460a770122..fef08d4c00 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,6 @@ on: - master paths: [ '.github/workflows/build.yml', - '.github/workflows/build-linux-cross.yml', '.github/workflows/build-cmake-pkg.yml', '**/CMakeLists.txt', '**/.cmake', @@ -29,7 +28,6 @@ on: types: [opened, synchronize, reopened] paths: [ '.github/workflows/build.yml', - '.github/workflows/build-linux-cross.yml', '.github/workflows/build-cmake-pkg.yml', '**/CMakeLists.txt', '**/.cmake', @@ -59,7 +57,10 @@ env: LLAMA_LOG_TIMESTAMPS: 1 jobs: - macOS-latest-cmake-arm64: + build-cmake-pkg: + uses: ./.github/workflows/build-cmake-pkg.yml + + macOS-latest-arm64: runs-on: macos-latest steps: @@ -70,7 +71,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-arm64 + key: macOS-latest-arm64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -95,7 +96,7 @@ jobs: cd build ctest -L main -E "test-llama-archs" --verbose --timeout 900 - macOS-latest-cmake-x64: + macOS-latest-x64: runs-on: macos-15-intel steps: @@ -106,7 +107,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-x64 + key: macOS-latest-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -131,7 +132,7 @@ jobs: cd build ctest -L main --verbose --timeout 900 - macOS-latest-cmake-arm64-webgpu: + macOS-latest-arm64-webgpu: runs-on: macos-latest steps: @@ -142,7 +143,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-arm64-webgpu + key: macOS-latest-arm64-webgpu evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -173,7 +174,7 @@ jobs: cd build ctest -L main --verbose --timeout 900 - ubuntu-cpu-cmake: + ubuntu-cpu: strategy: matrix: include: @@ -196,7 +197,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-cpu-cmake-${{ matrix.build }} + key: ubuntu-cpu-${{ matrix.build }} evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -258,98 +259,11 @@ jobs: wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf ./bin/llama-completion -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - ubuntu-latest-cmake-sanitizer: + ubuntu-latest-rpc: runs-on: ubuntu-latest continue-on-error: true - strategy: - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }} - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libssl-dev - - - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DGGML_OPENMP=OFF - - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-24-llguidance: - runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libssl-dev - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_LLGUIDANCE=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-24-cmake-rpc: - runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} - - continue-on-error: true - steps: - name: Clone id: checkout @@ -374,7 +288,7 @@ jobs: cd build ctest -L main --verbose - ubuntu-24-cmake-vulkan-deb: + ubuntu-24-vulkan: runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} steps: @@ -401,7 +315,7 @@ jobs: run: | cmake --build build -j $(nproc) - ubuntu-24-cmake-vulkan: + ubuntu-24-webgpu: runs-on: ubuntu-24.04 steps: @@ -412,66 +326,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-24-cmake-vulkan - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Dependencies - id: depends - run: | - sudo add-apt-repository -y ppa:kisak/kisak-mesa - sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev - - - name: Get latest Vulkan SDK version - id: vulkan_sdk_version - run: | - echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV" - - - name: Use Vulkan SDK Cache - uses: actions/cache@v5 - id: cache-sdk - with: - path: ./vulkan_sdk - key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }} - - - name: Setup Vulkan SDK - if: steps.cache-sdk.outputs.cache-hit != 'true' - uses: ./.github/actions/linux-setup-vulkan - with: - path: ./vulkan_sdk - version: ${{ env.VULKAN_SDK_VERSION }} - - - name: Build - id: cmake_build - run: | - source ./vulkan_sdk/setup-env.sh - cmake -B build \ - -DGGML_VULKAN=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - export GGML_VK_VISIBLE_DEVICES=0 - export GGML_VK_DISABLE_F16=1 - export GGML_VK_DISABLE_COOPMAT=1 - # This is using llvmpipe and runs slower than other backends - ctest -L main --verbose --timeout 4800 - - ubuntu-24-cmake-webgpu: - runs-on: ubuntu-24.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-24-cmake-webgpu + key: ubuntu-24-webgpu evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -531,7 +386,7 @@ jobs: # This is using llvmpipe and runs slower than other backends ctest -L main --verbose --timeout 900 - ubuntu-24-wasm-webgpu: + ubuntu-24-webgpu-wasm: runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} steps: @@ -565,7 +420,7 @@ jobs: cmake --build build-wasm --target test-backend-ops -j $(nproc) - ubuntu-22-cmake-hip: + ubuntu-22-hip: runs-on: ubuntu-22.04 container: rocm/dev-ubuntu-22.04:6.1.2 @@ -583,7 +438,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-hip + key: ubuntu-22-hip evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -596,7 +451,7 @@ jobs: -DGGML_HIP=ON cmake --build build --config Release -j $(nproc) - ubuntu-22-cmake-musa: + ubuntu-22-musa: runs-on: ubuntu-22.04 container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64 @@ -614,7 +469,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-musa + key: ubuntu-22-musa evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -625,7 +480,7 @@ jobs: -DGGML_MUSA=ON cmake --build build --config Release -j $(nproc) - ubuntu-22-cmake-sycl: + ubuntu-22-sycl: runs-on: ubuntu-22.04 continue-on-error: true @@ -660,7 +515,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-sycl + key: ubuntu-22-sycl evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -674,7 +529,7 @@ jobs: -DCMAKE_CXX_COMPILER=icpx cmake --build build --config Release -j $(nproc) - ubuntu-22-cmake-sycl-fp16: + ubuntu-22-sycl-fp16: runs-on: ubuntu-22.04 continue-on-error: true @@ -709,7 +564,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-sycl-fp16 + key: ubuntu-22-sycl-fp16 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -724,8 +579,8 @@ jobs: -DGGML_SYCL_F16=ON cmake --build build --config Release -j $(nproc) - ubuntu-24-cmake-openvino: - name: ubuntu-24-cmake-openvino-${{ matrix.openvino_device }} + ubuntu-24-openvino: + name: ubuntu-24-openvino-${{ matrix.openvino_device }} strategy: matrix: include: @@ -749,10 +604,12 @@ jobs: uses: actions/checkout@v6 - name: ccache + if: runner.environment == 'github-hosted' uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-24-cmake-openvino-${{ matrix.variant }}-no-preset-v1 + key: ubuntu-24-openvino-${{ matrix.variant }}-no-preset-v1 evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - name: Dependencies id: depends @@ -762,6 +619,7 @@ jobs: sudo apt-get install -y ocl-icd-opencl-dev opencl-headers opencl-clhpp-headers intel-opencl-icd - name: Use OpenVINO Toolkit Cache + if: runner.environment == 'github-hosted' uses: actions/cache@v5 id: cache-openvino with: @@ -801,194 +659,7 @@ jobs: fi ctest --test-dir build/ReleaseOV -L main -E "test-llama-archs" --verbose --timeout 2000 - build-linux-cross: - uses: ./.github/workflows/build-linux-cross.yml - - build-cmake-pkg: - uses: ./.github/workflows/build-cmake-pkg.yml - - macOS-latest-cmake-ios: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-ios - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-cmake-tvos: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-tvos - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=tvOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-cmake-visionos: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=visionOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-swift: - runs-on: macos-latest - needs: ios-xcode-build - - strategy: - matrix: - destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-swift - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Download xcframework artifact - uses: actions/download-artifact@v7 - with: - name: llama-xcframework - path: build-apple/llama.xcframework/ - - - name: Build llama.cpp with CMake - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_OPENSSL=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - windows-msys2: - runs-on: windows-2025 - - strategy: - fail-fast: false - matrix: - include: - - { sys: UCRT64, env: ucrt-x86_64, build: Release } - - { sys: CLANG64, env: clang-x86_64, build: Release } - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-msys2 - variant: ccache - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Setup ${{ matrix.sys }} - uses: msys2/setup-msys2@v2 - with: - update: true - msystem: ${{matrix.sys}} - install: >- - base-devel - git - mingw-w64-${{matrix.env}}-toolchain - mingw-w64-${{matrix.env}}-cmake - mingw-w64-${{matrix.env}}-openblas - - - name: Build using CMake - shell: msys2 {0} - run: | - cmake -B build - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - - name: Clean after building using CMake - shell: msys2 {0} - run: | - rm -rf build - - - name: Build using CMake w/ OpenBLAS - shell: msys2 {0} - run: | - cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - windows-latest-cmake: + windows-latest: runs-on: windows-2025 env: @@ -1023,7 +694,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-${{ matrix.build }} + key: windows-latest-${{ matrix.build }} variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -1110,7 +781,7 @@ jobs: # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 # & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - ubuntu-latest-cmake-cuda: + ubuntu-latest-cuda: runs-on: ubuntu-latest container: nvidia/cuda:12.6.2-devel-ubuntu24.04 @@ -1129,7 +800,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-latest-cmake-cuda + key: ubuntu-latest-cuda evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -1146,7 +817,7 @@ jobs: -DGGML_CUDA_CUB_3DOT2=ON cmake --build build - windows-2022-cmake-cuda: + windows-2022-cuda: runs-on: windows-2022 strategy: @@ -1195,7 +866,7 @@ jobs: cmake --build build --config Release -j %NINJA_JOBS% -t ggml cmake --build build --config Release - windows-latest-cmake-sycl: + windows-latest-sycl: runs-on: windows-2022 defaults: @@ -1214,7 +885,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-sycl + key: windows-latest-sycl variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -1229,7 +900,7 @@ jobs: id: cmake_build run: examples/sycl/win-build-sycl.bat - windows-latest-cmake-hip: + windows-latest-hip: runs-on: windows-2022 env: @@ -1296,223 +967,95 @@ jobs: -DGGML_RPC=ON cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - ios-xcode-build: - runs-on: macos-latest + ubuntu-cpu-riscv64-native: + runs-on: RISCV64 steps: - - name: Checkout code - uses: actions/checkout@v6 - - - name: Setup Xcode - uses: maxim-lobanov/setup-xcode@v1 - with: - xcode-version: latest-stable - - - name: Build - id: cmake_build + - name: Install dependencies run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_OPENSSL=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + sudo apt-get update - - name: xcodebuild for swift package - id: xcodebuild - run: | - ./build-xcframework.sh + # Install necessary packages + sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache git-lfs - - name: Upload xcframework artifact - uses: actions/upload-artifact@v6 - with: - name: llama-xcframework - path: build-apple/llama.xcframework/ - retention-days: 1 + # Set gcc-14 and g++-14 as the default compilers + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc + sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - name: Build Xcode project - run: | - xcodebuild -downloadPlatform iOS - xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + # Install Rust stable version + rustup install stable + rustup default stable - android-build: - runs-on: ubuntu-latest + git lfs install - steps: - - name: Clone - uses: actions/checkout@v6 - - # Disabled due to size (400MB) and always 0 cache hits - # - name: ccache - # uses: ggml-org/ccache-action@v1.2.16 - # with: - # key: android-build - # evict-old-files: 1d - - - name: Set up JDK - uses: actions/setup-java@v5 - with: - java-version: 17 - distribution: zulu - - - name: Setup Android SDK - uses: android-actions/setup-android@v3 - with: - log-accepted-android-sdk-licenses: false - - - name: Build - run: | - cd examples/llama.android - ./gradlew build --no-daemon - - android-ndk-build: - runs-on: ubuntu-latest - - env: - OPENCL_VERSION: 2025.07.22 - - strategy: - matrix: - include: - - build: 'arm64-cpu' - defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF' - - build: 'arm64-snapdragon' - defines: '--preset arm64-android-snapdragon-release' - - steps: - name: Clone id: checkout uses: actions/checkout@v6 - - name: Install OpenCL Headers and Libs - id: install_opencl - if: ${{ matrix.build == 'arm64-snapdragon' }} + - name: Check environment run: | - mkdir opencl - curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz - curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz - curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz - tar -xaf opencl/headers.tar.gz -C opencl - tar -xaf opencl/clhpp.tar.gz -C opencl - tar -xaf opencl/icd-loader.tar.gz -C opencl - sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include - sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL - cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION} - cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared - cmake --build build - sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android - rm -rf opencl + uname -a + gcc --version + g++ --version + ldd --version + cmake --version + rustc --version - - name: Install Hexagon SDK - id: install_hexsdk - if: ${{ matrix.build == 'arm64-snapdragon' }} - env: - HEXSDK_VER: 6.4.0.2 - HEXTLS_VER: 19.0.04 + - name: Setup ccache run: | - curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz - mkdir hex-sdk - tar -xaf hex-sdk.tar.gz -C hex-sdk - ls -l hex-sdk - sudo mv hex-sdk /opt/hexagon - echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV" - echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV" - echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV" - echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV" - echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV" - echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV" + # Set unique cache directory for this job + export CCACHE_DIR="$HOME/.ccache/cpu-cmake-rv64-native" + mkdir -p "$CCACHE_DIR" - - name: Update CMake presets - id: update_presets - if: ${{ matrix.build == 'arm64-snapdragon' }} - run: | - cp docs/backend/snapdragon/CMakeUserPresets.json . + # Configure ccache for optimal performance + ccache --set-config=max_size=5G + ccache --set-config=compression=true + ccache --set-config=compression_level=6 + ccache --set-config=cache_dir="$CCACHE_DIR" + + # Enable more aggressive caching + ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime + ccache --set-config=hash_dir=false + + # Export for subsequent steps + echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - name: Build - id: ndk_build + id: cmake_build run: | - cmake ${{ matrix.defines }} -B build - cmake --build build - cmake --install build --prefix pkg-adb/llama.cpp + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=ON \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DGGML_RPC=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config Release -j $(nproc) - name: Test id: cmake_test run: | - echo "FIXME: test on devices" + cd build + ctest -L main --verbose --timeout 900 - openEuler-latest-cmake-cann: - defaults: - run: - shell: bash -el {0} - strategy: - matrix: - arch: [x86, aarch64] - chip_type: ['910b', '310p'] - build: ['Release'] - use_acl_graph: ['on', 'off'] - exclude: - # 310P does not support USE_ACL_GRAPH=on - - chip_type: '310p' - use_acl_graph: 'on' - runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} - steps: - - name: Checkout - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - - name: Free up disk space - uses: ggml-org/free-disk-space@v1.3.1 - with: - tool-cache: true - - - name: Set container image - id: cann-image + - name: Test llama2c conversion + id: llama2c_test run: | - image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}" - echo "image=${image}" >> "${GITHUB_OUTPUT}" - - - name: Pull container image - run: docker pull "${{ steps.cann-image.outputs.image }}" - - - name: Build - env: - BUILD_TYPE: ${{ matrix.build }} - SOC_TYPE: ascend${{ matrix.chip_type }} - USE_ACL_GRAPH: ${{ matrix.use_acl_graph }} - run: | - HOST_UID=$(id -u) - HOST_GID=$(id -g) - - docker run --rm \ - -v "${PWD}:/workspace" \ - -w /workspace \ - -e SOC_TYPE=${SOC_TYPE} \ - -e BUILD_TYPE=${BUILD_TYPE} \ - -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \ - "${{ steps.cann-image.outputs.image }}" \ - bash -lc ' - set -e - yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel - yum clean all && rm -rf /var/cache/yum - git config --global --add safe.directory "/workspace" - export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} - cmake -S . -B build \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DGGML_CANN=on \ - -DSOC_TYPE=${SOC_TYPE} \ - -DUSE_ACL_GRAPH=${USE_ACL_GRAPH} - cmake --build build -j $(nproc) - - chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build - ' + cd build + echo "Fetch tokenizer" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin + echo "Fetch llama2c model" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin + ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf + ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 # TODO: simplify the following workflows using a matrix # TODO: run lighter CI on PRs and the full CI only on master (if needed) @@ -1672,342 +1215,7 @@ jobs: run: | GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - ubuntu-cpu-cmake-riscv64-native: - runs-on: RISCV64 - - steps: - - name: Install dependencies - run: | - sudo apt-get update - - # Install necessary packages - sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache git-lfs - - # Set gcc-14 and g++-14 as the default compilers - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 - sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc - sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - # Install Rust stable version - rustup install stable - rustup default stable - - git lfs install - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Check environment - run: | - uname -a - gcc --version - g++ --version - ldd --version - cmake --version - rustc --version - - - name: Setup ccache - run: | - # Set unique cache directory for this job - export CCACHE_DIR="$HOME/.ccache/cpu-cmake-rv64-native" - mkdir -p "$CCACHE_DIR" - - # Configure ccache for optimal performance - ccache --set-config=max_size=5G - ccache --set-config=compression=true - ccache --set-config=compression_level=6 - ccache --set-config=cache_dir="$CCACHE_DIR" - - # Enable more aggressive caching - ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime - ccache --set-config=hash_dir=false - - # Export for subsequent steps - echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV - echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=ON \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DGGML_RPC=ON \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 - - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - - name: Test llama2c conversion - id: llama2c_test - run: | - cd build - echo "Fetch tokenizer" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin - echo "Fetch llama2c model" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin - ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf - ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - - ubuntu-cmake-sanitizer-riscv64-native: - runs-on: RISCV64 - - continue-on-error: true - - strategy: - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug] - - steps: - - name: Install dependencies - run: | - sudo apt-get update - - # Install necessary packages - sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs - - # Set gcc-14 and g++-14 as the default compilers - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 - sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc - sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - # Install Rust stable version - rustup install stable - rustup default stable - - git lfs install - - - name: GCC version check - run: | - gcc --version - g++ --version - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Setup ccache - run: | - # Unique cache directory per matrix combination - export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}" - mkdir -p "$CCACHE_DIR" - - # Configure ccache - ccache --set-config=max_size=5G - ccache --set-config=compression=true - ccache --set-config=compression_level=6 - ccache --set-config=cache_dir="$CCACHE_DIR" - ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime - ccache --set-config=hash_dir=false - - # Export for subsequent steps - echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV - echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - - - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_OPENSSL=OFF \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DGGML_OPENMP=ON \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 - - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_OPENSSL=OFF \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 - - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - - ubuntu-llguidance-riscv64-native: - runs-on: RISCV64 - steps: - - name: Install dependencies - run: | - sudo apt-get update - - # Install necessary packages - sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs - - # Set gcc-14 and g++-14 as the default compilers - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 - sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc - sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - # Install Rust stable version - rustup install stable - rustup default stable - - git lfs install - - - name: GCC version check - run: | - gcc --version - g++ --version - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Setup ccache - run: | - export CCACHE_DIR="$HOME/.ccache/llguidance-riscv64" - mkdir -p "$CCACHE_DIR" - - ccache --set-config=max_size=5G - ccache --set-config=compression=true - ccache --set-config=compression_level=6 - ccache --set-config=cache_dir="$CCACHE_DIR" - ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime - ccache --set-config=hash_dir=false - - echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV - echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DLLAMA_OPENSSL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DLLAMA_LLGUIDANCE=ON \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 - - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - - ubuntu-cmake-rpc-riscv64-native: - runs-on: RISCV64 - - continue-on-error: true - - steps: - - name: Install dependencies - run: | - sudo apt-get update - - # Install necessary packages - sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache git-lfs - - # Set gcc-14 and g++-14 as the default compilers - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 - sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc - sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - # Install Rust stable version - rustup install stable - rustup default stable - - git lfs install - - - name: GCC version check - run: | - gcc --version - g++ --version - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Setup ccache - run: | - export CCACHE_DIR="$HOME/.ccache/rpc-riscv64" - mkdir -p "$CCACHE_DIR" - - ccache --set-config=max_size=5G - ccache --set-config=compression=true - ccache --set-config=compression_level=6 - ccache --set-config=cache_dir="$CCACHE_DIR" - ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime - ccache --set-config=hash_dir=false - - echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV - echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=ON \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ - -DGGML_RPC=ON - - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose - - ggml-ci-arm64-graviton4-kleidiai: + ggml-ci-arm64-cpu-kleidiai-graviton4: runs-on: ah-ubuntu_22_04-c8g_8x steps: @@ -2044,7 +1252,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ggml-ci-arm64-graviton4-kleidiai + key: ggml-ci-arm64-cpu-kleidiai-graviton4 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 8d1dd7a7d5..e21b3b6568 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -4,10 +4,16 @@ on: push: branches: - master - paths: ['.github/workflows/python-lint.yml', '**/*.py'] + paths: [ + '.github/workflows/python-lint.yml', + '**/*.py' + ] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/python-lint.yml', '**/*.py'] + paths: [ + '.github/workflows/python-lint.yml', + '**/*.py' + ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f329630071..b0f2714ffd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,7 +10,22 @@ on: push: branches: - master - paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + paths: [ + '.github/workflows/release.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh', + '**/*.swift', + '**/*.m', + '**/*.metal', + '**/*.comp', + '**/*.glsl' + ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -34,7 +49,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-arm64 + key: macOS-latest-arm64 evict-old-files: 1d - name: Build @@ -81,7 +96,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-x64 + key: macOS-latest-x64 evict-old-files: 1d - name: Build @@ -140,7 +155,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-cpu-cmake-${{ matrix.build }} + key: ubuntu-cpu-${{ matrix.build }} evict-old-files: 1d - name: Dependencies @@ -191,7 +206,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-vulkan + key: ubuntu-22-vulkan evict-old-files: 1d - name: Dependencies @@ -256,7 +271,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-24-cmake-openvino-release-no-preset-v1 + key: ubuntu-24-openvino-release-no-preset-v1 evict-old-files: 1d - name: Dependencies @@ -329,7 +344,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-cpu-${{ matrix.arch }} + key: windows-latest-cpu-${{ matrix.arch }} variant: ccache evict-old-files: 1d @@ -390,7 +405,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }} + key: windows-latest-${{ matrix.backend }}-${{ matrix.arch }} variant: ccache evict-old-files: 1d @@ -536,7 +551,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-sycl + key: windows-latest-sycl variant: ccache evict-old-files: 1d @@ -616,7 +631,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-rocm-cmake-${{ matrix.ROCM_VERSION }}-${{ matrix.build }} + key: ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }} evict-old-files: 1d - name: Dependencies @@ -726,7 +741,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 + key: windows-latest-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 evict-old-files: 1d - name: Install ROCm diff --git a/.github/workflows/server-sanitize.yml b/.github/workflows/server-sanitize.yml new file mode 100644 index 0000000000..4c9f447cf8 --- /dev/null +++ b/.github/workflows/server-sanitize.yml @@ -0,0 +1,105 @@ +name: Server (sanitize) + +on: + workflow_dispatch: # allows manual triggering + inputs: + sha: + description: 'Commit SHA1 to build' + required: false + type: string + slow_tests: + description: 'Run slow tests' + required: true + type: boolean + push: + branches: + - master + paths: [ + '.github/workflows/server-sanitize.yml', + '**/CMakeLists.txt', + '**/Makefile', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + 'tools/server/**.*' + ] + +env: + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_LOG_VERBOSITY: 10 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + server: + runs-on: ubuntu-latest + + strategy: + matrix: + sanitizer: [ADDRESS, UNDEFINED] # THREAD is very slow + build_type: [RelWithDebInfo] + fail-fast: false + + steps: + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get -y install \ + build-essential \ + xxd \ + git \ + cmake \ + curl \ + wget \ + language-pack-en \ + libssl-dev + + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_BUILD_BORINGSSL=ON \ + -DGGML_SCHED_NO_REALLOC=ON \ + -DGGML_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ + -DGGML_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ + -DGGML_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} \ + -DLLAMA_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ + -DLLAMA_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ + -DLLAMA_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + + - name: Python setup + id: setup_python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + pip-install: -r tools/server/tests/requirements.txt + + - name: Tests + id: server_integration_tests + if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} + run: | + cd tools/server/tests + export ${{ matrix.extra_args }} + pytest -v -x -m "not slow" + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} + run: | + cd tools/server/tests + export ${{ matrix.extra_args }} + SLOW_TESTS=1 pytest -v -x diff --git a/.github/workflows/server-self-hosted.yml b/.github/workflows/server-self-hosted.yml index a11aea7e89..29bd79690a 100644 --- a/.github/workflows/server-self-hosted.yml +++ b/.github/workflows/server-self-hosted.yml @@ -14,7 +14,19 @@ on: push: branches: - master - paths: ['.github/workflows/server-self-hosted.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + paths: [ + '.github/workflows/server-self-hosted.yml', + '**/CMakeLists.txt', + '**/Makefile', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.swift', + '**/*.m', + 'tools/server/**.*' + ] env: LLAMA_LOG_COLORS: 1 diff --git a/.github/workflows/server-webui.yml b/.github/workflows/server-webui.yml index 4d560ff58d..492107ffd8 100644 --- a/.github/workflows/server-webui.yml +++ b/.github/workflows/server-webui.yml @@ -1,4 +1,3 @@ -# Server WebUI build and tests name: Server WebUI on: @@ -11,10 +10,20 @@ on: push: branches: - master - paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**'] + paths: [ + '.github/workflows/server-webui.yml', + 'tools/server/webui/**.*', + 'tools/server/tests/**.*', + 'tools/server/public/**' + ] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**'] + paths: [ + '.github/workflows/server-webui.yml', + 'tools/server/webui/**.*', + 'tools/server/tests/**.*', + 'tools/server/public/**' + ] env: LLAMA_LOG_COLORS: 1 diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 99d05226ba..750c29f08e 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -1,4 +1,3 @@ -# Server build and tests name: Server on: @@ -15,10 +14,34 @@ on: push: branches: - master - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + paths: [ + '.github/workflows/server.yml', + '**/CMakeLists.txt', + '**/Makefile', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.swift', + '**/*.m', + 'tools/server/**.*' + ] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + paths: [ + '.github/workflows/server.yml', + '**/CMakeLists.txt', + '**/Makefile', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.swift', + '**/*.m', + 'tools/server/**.*' + ] env: LLAMA_LOG_COLORS: 1 @@ -34,17 +57,18 @@ jobs: server: runs-on: ubuntu-latest + name: server (${{ matrix.wf_name }}) strategy: matrix: - sanitizer: [ADDRESS, UNDEFINED] # THREAD is very slow - build_type: [RelWithDebInfo] + build_type: [Release] + wf_name: ["default"] include: - build_type: Release - sanitizer: "" extra_args: "" + wf_name: "default" - build_type: Release - sanitizer: "" extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" + wf_name: "backend-sampling" fail-fast: false steps: @@ -74,13 +98,7 @@ jobs: run: | cmake -B build \ -DLLAMA_BUILD_BORINGSSL=ON \ - -DGGML_SCHED_NO_REALLOC=ON \ - -DGGML_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ - -DGGML_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ - -DGGML_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} \ - -DLLAMA_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ - -DLLAMA_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ - -DLLAMA_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} + -DGGML_SCHED_NO_REALLOC=ON cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - name: Python setup diff --git a/CODEOWNERS b/CODEOWNERS index 29673a7b26..4257f5927a 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -2,29 +2,13 @@ # multiplie collaborators per item can be specified /.devops/*.Dockerfile @ngxson -/.github/actions/ @CISC -/.github/workflows/ @CISC +/.github/actions/ @ggml-org/ci +/.github/workflows/ @ggml-org/ci /ci/ @ggerganov /cmake/ @ggerganov -/common/CMakeLists.txt @ggerganov -/common/arg.* @ggerganov -/common/base64.hpp.* @ggerganov -/common/build-info.* @ggerganov -/common/chat.* @pwilkin -/common/chat-auto*.* @pwilkin -/common/chat-diff-analyzer.* @pwilkin -/common/chat-peg-parser.* @aldehir -/common/common.* @ggerganov -/common/console.* @ggerganov -/common/http.* @angt -/common/jinja/ @ngxson @CISC @aldehir -/common/llguidance.* @ggerganov -/common/log.* @ggerganov +/common/ @ggml-org/llama-common +/common/jinja/ @CISC /common/ngram-map.* @srogmann -/common/peg-parser.* @aldehir -/common/sampling.* @ggerganov -/common/speculative.* @ggerganov -/common/unicode.* @aldehir /convert_*.py @CISC /examples/batched.swift/ @ggerganov /examples/batched/ @ggerganov @@ -51,29 +35,27 @@ /examples/speculative/ @ggerganov /ggml/cmake/ @ggerganov /ggml/include/ @ggerganov +/ggml/src/ggml-cann/ @ggml-org/ggml-cann /ggml/src/ggml-common.h @ggerganov /ggml/src/ggml-cpu/ @ggerganov /ggml/src/ggml-cpu/spacemit/ @alex-spacemit -/ggml/src/ggml-cuda/fattn* @JohannesGaessler -/ggml/src/ggml-cuda/mmf.* @JohannesGaessler @am17an -/ggml/src/ggml-cuda/mmq.* @JohannesGaessler -/ggml/src/ggml-cuda/mmvf.* @JohannesGaessler -/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler +/ggml/src/ggml-cuda/ @ggml-org/ggml-cuda /ggml/src/ggml-cuda/fattn-wmma* @IMbackK /ggml/src/ggml-hip/ @IMbackK /ggml/src/ggml-cuda/vendors/hip.h @IMbackK /ggml/src/ggml-impl.h @ggerganov -/ggml/src/ggml-metal/ @ggerganov -/ggml/src/ggml-opencl/ @lhez @max-krasnyansky -/ggml/src/ggml-hexagon/ @max-krasnyansky @lhez +/ggml/src/ggml-metal/ @ggml-org/ggml-metal +/ggml/src/ggml-opencl/ @ggml-org/ggml-opencl +/ggml/src/ggml-hexagon/ @ggml-org/ggml-hexagon /ggml/src/ggml-opt.cpp @JohannesGaessler /ggml/src/ggml-quants.* @ggerganov -/ggml/src/ggml-rpc/ @rgerganov +/ggml/src/ggml-rpc/ @ggml-org/ggml-rpc +/ggml/src/ggml-sycl/ @ggml-org/ggml-sycl /ggml/src/ggml-threading.* @ggerganov -/ggml/src/ggml-vulkan/ @0cc4m +/ggml/src/ggml-vulkan/ @ggml-org/ggml-vulkan /ggml/src/ggml-virtgpu/ @kpouget -/ggml/src/ggml-webgpu/ @reeselevine -/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM +/ggml/src/ggml-webgpu/ @ggml-org/ggml-webgpu +/ggml/src/ggml-zdnn/ @ggml-org/ggml-zdnn @Andreas-Krebbel @AlekseiNikiforovIBM /ggml/src/ggml-openvino/ @cavusmustafa @wine99 /ggml/src/ggml.c @ggerganov /ggml/src/ggml.cpp @ggerganov @@ -93,16 +75,18 @@ /src/models/ @CISC /tests/ @ggerganov /tests/test-chat.* @pwilkin +/tests/test-llama-archs.cpp @JohannesGaessler /tools/batched-bench/ @ggerganov /tools/cli/ @ngxson /tools/completion/ @ggerganov -/tools/mtmd/ @ngxson +/tools/mtmd/ @ggml-org/llama-mtmd /tools/perplexity/ @ggerganov /tools/parser/ @pwilkin /tools/quantize/ @ggerganov -/tools/rpc/ @rgerganov -/tools/server/* @ngxson @ggerganov # no subdir -/tools/server/webui/ @allozaur +/tools/rpc/ @ggml-org/ggml-rpc +/tools/server/* @ggml-org/llama-server # no subdir +/tools/server/tests/ @ggml-org/llama-server +/tools/server/webui/ @ggml-org/llama-webui /tools/tokenize/ @ggerganov /tools/tts/ @ggerganov /vendor/ @ggerganov diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 4068340a5c..05b3b6b6a8 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -479,6 +479,7 @@ analyze_content::analyze_content(const common_chat_template & tmpl, const analyz if (!comparison_with_tools || !comparison_with_reasoning) { LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__); + return; } const auto & diff_tools = comparison_with_tools->diff; @@ -911,8 +912,10 @@ void analyze_tools::extract_function_markers() { // we'll have to rely on an extra diff with no-calls version auto notool_comp = compare_variants( *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); }); - auto nt_diff = notool_comp->diff; - closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4); + if (notool_comp) { + auto nt_diff = notool_comp->diff; + closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4); + } } else { closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker)); } diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index b6a7460da8..e9abdf288c 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -892,7 +892,7 @@ void launch_fattn( const int ntiles_x = ((Q->ne[1] + ncols1 - 1) / ncols1); const int gqa_ratio = Q->ne[2] / K->ne[2]; const int ntiles_z_gqa = ((gqa_ratio + ncols2 - 1) / ncols2); - const int ntiles_total = ntiles_x * ntiles_z_gqa * K->ne[2] * Q->ne[3]; + const int ntiles_dst = ntiles_x * ntiles_z_gqa * K->ne[2] * Q->ne[3]; // Optional optimization where the mask is scanned to determine whether part of the calculation can be skipped. // Only worth the overhead if there is at lease one FATTN_KQ_STRIDE x FATTN_KQ_STRIDE square to be skipped or @@ -919,37 +919,37 @@ void launch_fattn( GGML_ASSERT(max_blocks_per_sm > 0); int parallel_blocks = max_blocks_per_sm; + const int ntiles_KV = (K->ne[1] + nbatch_fa - 1) / nbatch_fa; // Max. number of parallel blocks limited by KV cache length. + dim3 blocks_num; if (stream_k) { // For short contexts it can be faster to have the SMs work on whole tiles because this lets us skip the fixup. const int max_blocks = max_blocks_per_sm*nsm; - const int tiles_nwaves = (ntiles_total + max_blocks - 1) / max_blocks; - const int tiles_efficiency_percent = 100 * ntiles_total / (max_blocks*tiles_nwaves); + const int tiles_nwaves = (ntiles_dst + max_blocks - 1) / max_blocks; + const int tiles_efficiency_percent = 100 * ntiles_dst / (max_blocks*tiles_nwaves); - const int nblocks_stream_k = max_blocks; + const int nblocks_stream_k = std::min(max_blocks, ntiles_KV*ntiles_dst); const bool use_stream_k = cc >= GGML_CUDA_CC_ADA_LOVELACE || amd_wmma_available(cc) || tiles_efficiency_percent < 75; - blocks_num.x = use_stream_k ? nblocks_stream_k : ntiles_total; + blocks_num.x = use_stream_k ? nblocks_stream_k : ntiles_dst; blocks_num.y = 1; blocks_num.z = 1; - if (ntiles_total % blocks_num.x != 0) { // Fixup is only needed if the SMs work on fractional tiles. + if (ntiles_dst % blocks_num.x != 0) { // Fixup is only needed if the SMs work on fractional tiles. dst_tmp_meta.alloc((size_t(blocks_num.x) * ncols * (2 + DV/2))); } } else { - const int ntiles_KQ = (K->ne[1] + nbatch_fa - 1) / nbatch_fa; // Max. number of parallel blocks limited by tensor size. - // parallel_blocks must not be larger than what the tensor size allows: - parallel_blocks = std::min(parallel_blocks, ntiles_KQ); + parallel_blocks = std::min(parallel_blocks, ntiles_KV); // If ntiles_total % blocks_per_wave != 0 then some efficiency is lost due to tail effects. // Test whether parallel_blocks can be set to a higher value for better efficiency. const int blocks_per_wave = nsm * max_blocks_per_sm; int nwaves_best = 0; int efficiency_percent_best = 0; - for (int parallel_blocks_test = parallel_blocks; parallel_blocks_test <= ntiles_KQ; ++parallel_blocks_test) { - const int nblocks_total = ntiles_total * parallel_blocks_test; + for (int parallel_blocks_test = parallel_blocks; parallel_blocks_test <= ntiles_KV; ++parallel_blocks_test) { + const int nblocks_total = ntiles_dst * parallel_blocks_test; const int nwaves = (nblocks_total + blocks_per_wave - 1) / blocks_per_wave; const int efficiency_percent = 100 * nblocks_total / (nwaves*blocks_per_wave); @@ -1015,7 +1015,7 @@ void launch_fattn( CUDA_CHECK(cudaGetLastError()); if (stream_k) { - if (ntiles_total % blocks_num.x != 0) { // Fixup is only needed if the SMs work on fractional tiles. + if (ntiles_dst % blocks_num.x != 0) { // Fixup is only needed if the SMs work on fractional tiles. const dim3 block_dim_combine(DV, 1, 1); const dim3 blocks_num_combine = {blocks_num.x, ncols1, ncols2}; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index ce7a80acde..5a0be4a472 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -124,7 +124,10 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) err = cudaMallocManaged(ptr, size); #if defined(GGML_USE_HIP) if (err == hipSuccess) { - CUDA_CHECK(cudaMemAdvise(*ptr, size, hipMemAdviseSetCoarseGrain, device)); + // hipMemAdviseSetCoarseGrain is an optional performance hint; + // ignore errors (e.g. hipErrorInvalidValue on some APU/iGPU configs). + cudaMemAdvise(*ptr, size, hipMemAdviseSetCoarseGrain, device); + (void)hipGetLastError(); // clear any error } // fall back to cudaMalloc if not supported (e.g. on Windows) @@ -251,11 +254,6 @@ static ggml_cuda_device_info ggml_cuda_init() { info.devices[id].supports_cooperative_launch = false; #endif // !(GGML_USE_MUSA) - // cudaMemGetInfo returns info for the current device - size_t free_mem; - CUDA_CHECK(cudaSetDevice(id)); - CUDA_CHECK(cudaMemGetInfo(&free_mem, NULL)); - #if defined(GGML_USE_HIP) info.devices[id].smpbo = prop.sharedMemPerBlock; @@ -270,25 +268,25 @@ static ggml_cuda_device_info ggml_cuda_init() { info.devices[id].cc += prop.minor * 0x10; } } - GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, VRAM: %zu MiB (%zu MiB free)\n", + GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, VRAM: %zu MiB\n", id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, device_vmm ? "yes" : "no", prop.warpSize, - (size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024)); + (size_t)(prop.totalGlobalMem / (1024 * 1024))); #elif defined(GGML_USE_MUSA) // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs. info.devices[id].warp_size = 32; info.devices[id].smpbo = prop.sharedMemPerBlockOptin; info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100; info.devices[id].cc += prop.minor * 0x10; - GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB (%zu MiB free)\n", + GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB\n", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no", - (size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024)); + (size_t)(prop.totalGlobalMem / (1024 * 1024))); #else info.devices[id].smpbo = prop.sharedMemPerBlockOptin; info.devices[id].cc = 100*prop.major + 10*prop.minor; - GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB (%zu MiB free)\n", + GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, VRAM: %zu MiB\n", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no", - (size_t)(prop.totalGlobalMem / (1024 * 1024)), free_mem / (1024 * 1024)); + (size_t)(prop.totalGlobalMem / (1024 * 1024))); std::string device_name(prop.name); if (device_name == "NVIDIA GeForce MX450") { turing_devices_without_mma.push_back({ id, device_name }); @@ -303,6 +301,7 @@ static ggml_cuda_device_info ggml_cuda_init() { // TODO: Check for future drivers the default scheduling strategy and // remove this call again when cudaDeviceScheduleSpin is default. if (prop.major == 12 && prop.minor == 1) { + CUDA_CHECK(cudaSetDevice(id)); CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); } diff --git a/ggml/src/ggml-sycl/gated_delta_net.cpp b/ggml/src/ggml-sycl/gated_delta_net.cpp index 8c76afbd57..648455c134 100644 --- a/ggml/src/ggml-sycl/gated_delta_net.cpp +++ b/ggml/src/ggml-sycl/gated_delta_net.cpp @@ -55,7 +55,7 @@ void gated_delta_net_sycl(const float * q, #pragma unroll for (int r = 0; r < rows_per_lane; r++) { const int i = r * warp_size + lane; - s_shard[r] = curr_state[i * S_v + col]; + s_shard[r] = curr_state[col * S_v + i]; } for (int t = 0; t < n_tokens; t++) { @@ -137,7 +137,7 @@ void gated_delta_net_sycl(const float * q, #pragma unroll for (int r = 0; r < rows_per_lane; r++) { const int i = r * warp_size + lane; - state[i * S_v + col] = s_shard[r]; + state[col * S_v + i] = s_shard[r]; } } diff --git a/scripts/sync_vendor.py b/scripts/sync_vendor.py index 1a87d73563..4d254afcd6 100755 --- a/scripts/sync_vendor.py +++ b/scripts/sync_vendor.py @@ -5,7 +5,7 @@ import os import sys import subprocess -HTTPLIB_VERSION = "refs/tags/v0.37.2" +HTTPLIB_VERSION = "refs/tags/v0.38.0" vendor = { "https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp", diff --git a/tools/server/tests/unit/test_completion.py b/tools/server/tests/unit/test_completion.py index 2a980601ec..61042da55c 100644 --- a/tools/server/tests/unit/test_completion.py +++ b/tools/server/tests/unit/test_completion.py @@ -563,7 +563,7 @@ def test_cancel_request(): except requests.exceptions.ReadTimeout: pass # expected # make sure the slot is free - time.sleep(1) # wait for HTTP_POLLING_SECONDS + time.sleep(2) res = server.make_request("GET", "/slots") assert res.body[0]["is_processing"] == False diff --git a/vendor/cpp-httplib/httplib.cpp b/vendor/cpp-httplib/httplib.cpp index 41e7a361c0..fa0718218e 100644 --- a/vendor/cpp-httplib/httplib.cpp +++ b/vendor/cpp-httplib/httplib.cpp @@ -1025,6 +1025,30 @@ bool is_valid_path(const std::string &path) { return true; } +bool canonicalize_path(const char *path, std::string &resolved) { +#if defined(_WIN32) + char buf[_MAX_PATH]; + if (_fullpath(buf, path, _MAX_PATH) == nullptr) { return false; } + resolved = buf; +#else + char buf[PATH_MAX]; + if (realpath(path, buf) == nullptr) { return false; } + resolved = buf; +#endif + return true; +} + +bool is_path_within_base(const std::string &resolved_path, + const std::string &resolved_base) { +#if defined(_WIN32) + return _strnicmp(resolved_path.c_str(), resolved_base.c_str(), + resolved_base.size()) == 0; +#else + return strncmp(resolved_path.c_str(), resolved_base.c_str(), + resolved_base.size()) == 0; +#endif +} + FileStat::FileStat(const std::string &path) { #if defined(_WIN32) auto wpath = u8string_to_wstring(path.c_str()); @@ -2627,33 +2651,114 @@ bool can_compress_content_type(const std::string &content_type) { } } +bool parse_quality(const char *b, const char *e, std::string &token, + double &quality) { + quality = 1.0; + token.clear(); + + // Split on first ';': left = token name, right = parameters + const char *params_b = nullptr; + std::size_t params_len = 0; + + divide( + b, static_cast(e - b), ';', + [&](const char *lb, std::size_t llen, const char *rb, std::size_t rlen) { + auto r = trim(lb, lb + llen, 0, llen); + if (r.first < r.second) { token.assign(lb + r.first, lb + r.second); } + params_b = rb; + params_len = rlen; + }); + + if (token.empty()) { return false; } + if (params_len == 0) { return true; } + + // Scan parameters for q= (stops on first match) + bool invalid = false; + split_find(params_b, params_b + params_len, ';', + (std::numeric_limits::max)(), + [&](const char *pb, const char *pe) -> bool { + // Match exactly "q=" or "Q=" (not "query=" etc.) + auto len = static_cast(pe - pb); + if (len < 2) { return false; } + if ((pb[0] != 'q' && pb[0] != 'Q') || pb[1] != '=') { + return false; + } + + // Trim the value portion + auto r = trim(pb, pe, 2, len); + if (r.first >= r.second) { + invalid = true; + return true; + } + + double v = 0.0; + auto res = from_chars(pb + r.first, pb + r.second, v); + if (res.ec != std::errc{} || v < 0.0 || v > 1.0) { + invalid = true; + return true; + } + quality = v; + return true; + }); + + return !invalid; +} + EncodingType encoding_type(const Request &req, const Response &res) { - auto ret = - detail::can_compress_content_type(res.get_header_value("Content-Type")); - if (!ret) { return EncodingType::None; } + if (!can_compress_content_type(res.get_header_value("Content-Type"))) { + return EncodingType::None; + } const auto &s = req.get_header_value("Accept-Encoding"); - (void)(s); + if (s.empty()) { return EncodingType::None; } + // Single-pass: iterate tokens and track the best supported encoding. + // Server preference breaks ties (br > gzip > zstd). + EncodingType best = EncodingType::None; + double best_q = 0.0; // q=0 means "not acceptable" + + // Server preference: Brotli > Gzip > Zstd (lower = more preferred) + auto priority = [](EncodingType t) -> int { + switch (t) { + case EncodingType::Brotli: return 0; + case EncodingType::Gzip: return 1; + case EncodingType::Zstd: return 2; + default: return 3; + } + }; + + std::string name; + split(s.data(), s.data() + s.size(), ',', [&](const char *b, const char *e) { + double quality = 1.0; + if (!parse_quality(b, e, name, quality)) { return; } + if (quality <= 0.0) { return; } + + EncodingType type = EncodingType::None; #ifdef CPPHTTPLIB_BROTLI_SUPPORT - // TODO: 'Accept-Encoding' has br, not br;q=0 - ret = s.find("br") != std::string::npos; - if (ret) { return EncodingType::Brotli; } + if (case_ignore::equal(name, "br")) { type = EncodingType::Brotli; } #endif - #ifdef CPPHTTPLIB_ZLIB_SUPPORT - // TODO: 'Accept-Encoding' has gzip, not gzip;q=0 - ret = s.find("gzip") != std::string::npos; - if (ret) { return EncodingType::Gzip; } + if (type == EncodingType::None && case_ignore::equal(name, "gzip")) { + type = EncodingType::Gzip; + } #endif - #ifdef CPPHTTPLIB_ZSTD_SUPPORT - // TODO: 'Accept-Encoding' has zstd, not zstd;q=0 - ret = s.find("zstd") != std::string::npos; - if (ret) { return EncodingType::Zstd; } + if (type == EncodingType::None && case_ignore::equal(name, "zstd")) { + type = EncodingType::Zstd; + } #endif - return EncodingType::None; + if (type == EncodingType::None) { return; } + + // Higher q-value wins; for equal q, server preference breaks ties + if (quality > best_q || + (quality == best_q && priority(type) < priority(best))) { + best_q = quality; + best = type; + } + }); + + return best; } bool nocompressor::compress(const char *data, size_t data_length, @@ -2937,6 +3042,21 @@ create_decompressor(const std::string &encoding) { return decompressor; } +// Returns the best available compressor and its Content-Encoding name. +// Priority: Brotli > Gzip > Zstd (matches server-side preference). +std::pair, const char *> +create_compressor() { +#ifdef CPPHTTPLIB_BROTLI_SUPPORT + return {detail::make_unique(), "br"}; +#elif defined(CPPHTTPLIB_ZLIB_SUPPORT) + return {detail::make_unique(), "gzip"}; +#elif defined(CPPHTTPLIB_ZSTD_SUPPORT) + return {detail::make_unique(), "zstd"}; +#else + return {nullptr, nullptr}; +#endif +} + bool is_prohibited_header_name(const std::string &name) { using udl::operator""_t; @@ -3769,7 +3889,7 @@ bool parse_accept_header(const std::string &s, struct AcceptEntry { std::string media_type; double quality; - int order; // Original order in header + int order; }; std::vector entries; @@ -3787,48 +3907,12 @@ bool parse_accept_header(const std::string &s, } AcceptEntry accept_entry; - accept_entry.quality = 1.0; // Default quality accept_entry.order = order++; - // Find q= parameter - auto q_pos = entry.find(";q="); - if (q_pos == std::string::npos) { q_pos = entry.find("; q="); } - - if (q_pos != std::string::npos) { - // Extract media type (before q parameter) - accept_entry.media_type = trim_copy(entry.substr(0, q_pos)); - - // Extract quality value - auto q_start = entry.find('=', q_pos) + 1; - auto q_end = entry.find(';', q_start); - if (q_end == std::string::npos) { q_end = entry.length(); } - - std::string quality_str = - trim_copy(entry.substr(q_start, q_end - q_start)); - if (quality_str.empty()) { - has_invalid_entry = true; - return; - } - - { - double v = 0.0; - auto res = detail::from_chars( - quality_str.data(), quality_str.data() + quality_str.size(), v); - if (res.ec == std::errc{}) { - accept_entry.quality = v; - } else { - has_invalid_entry = true; - return; - } - } - // Check if quality is in valid range [0.0, 1.0] - if (accept_entry.quality < 0.0 || accept_entry.quality > 1.0) { - has_invalid_entry = true; - return; - } - } else { - // No quality parameter, use entire entry as media type - accept_entry.media_type = entry; + if (!parse_quality(entry.data(), entry.data() + entry.size(), + accept_entry.media_type, accept_entry.quality)) { + has_invalid_entry = true; + return; } // Remove additional parameters from media type @@ -5481,7 +5565,8 @@ std::string decode_path_component(const std::string &component) { // Unicode %uXXXX encoding auto val = 0; if (detail::from_hex_to_i(component, i + 2, 4, val)) { - // 4 digits Unicode codes + // 4 digits Unicode codes: val is 0x0000-0xFFFF (from 4 hex digits), + // so to_utf8 writes at most 3 bytes. buff[4] is safe. char buff[4]; size_t len = detail::to_utf8(val, buff); if (len > 0) { result.append(buff, len); } @@ -5586,6 +5671,30 @@ std::string decode_query_component(const std::string &component, return result; } +std::string sanitize_filename(const std::string &filename) { + // Extract basename: find the last path separator (/ or \) + auto pos = filename.find_last_of("/\\"); + auto result = + (pos != std::string::npos) ? filename.substr(pos + 1) : filename; + + // Strip null bytes + result.erase(std::remove(result.begin(), result.end(), '\0'), result.end()); + + // Trim whitespace + { + auto start = result.find_first_not_of(" \t"); + auto end = result.find_last_not_of(" \t"); + result = (start == std::string::npos) + ? "" + : result.substr(start, end - start + 1); + } + + // Reject . and .. + if (result == "." || result == "..") { return ""; } + + return result; +} + std::string append_query_params(const std::string &path, const Params ¶ms) { std::string path_with_query = path; @@ -6714,7 +6823,18 @@ bool Server::set_mount_point(const std::string &mount_point, if (stat.is_dir()) { std::string mnt = !mount_point.empty() ? mount_point : "/"; if (!mnt.empty() && mnt[0] == '/') { - base_dirs_.push_back({std::move(mnt), dir, std::move(headers)}); + std::string resolved_base; + if (detail::canonicalize_path(dir.c_str(), resolved_base)) { +#if defined(_WIN32) + if (resolved_base.back() != '\\' && resolved_base.back() != '/') { + resolved_base += '\\'; + } +#else + if (resolved_base.back() != '/') { resolved_base += '/'; } +#endif + } + base_dirs_.push_back( + {std::move(mnt), dir, std::move(resolved_base), std::move(headers)}); return true; } } @@ -6874,6 +6994,20 @@ Server &Server::set_payload_max_length(size_t length) { return *this; } +Server &Server::set_websocket_ping_interval(time_t sec) { + websocket_ping_interval_sec_ = sec; + return *this; +} + +template +Server &Server::set_websocket_ping_interval( + const std::chrono::duration &duration) { + detail::duration_to_sec_and_usec(duration, [&](time_t sec, time_t /*usec*/) { + set_websocket_ping_interval(sec); + }); + return *this; +} + bool Server::bind_to_port(const std::string &host, int port, int socket_flags) { auto ret = bind_internal(host, port, socket_flags); @@ -7294,6 +7428,18 @@ bool Server::handle_file_request(Request &req, Response &res) { auto path = entry.base_dir + sub_path; if (path.back() == '/') { path += "index.html"; } + // Defense-in-depth: is_valid_path blocks ".." traversal in the URL, + // but symlinks/junctions can still escape the base directory. + if (!entry.resolved_base_dir.empty()) { + std::string resolved_path; + if (detail::canonicalize_path(path.c_str(), resolved_path) && + !detail::is_path_within_base(resolved_path, + entry.resolved_base_dir)) { + res.status = StatusCode::Forbidden_403; + return true; + } + } + detail::FileStat stat(path); if (stat.is_dir()) { @@ -8012,7 +8158,7 @@ Server::process_request(Stream &strm, const std::string &remote_addr, { // Use WebSocket-specific read timeout instead of HTTP timeout strm.set_read_timeout(CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND, 0); - ws::WebSocket ws(strm, req, true); + ws::WebSocket ws(strm, req, true, websocket_ping_interval_sec_); entry.handler(req, ws); } return true; @@ -8256,6 +8402,13 @@ bool ClientImpl::ensure_socket_connection(Socket &socket, Error &error) { return create_and_connect_socket(socket, error); } +bool ClientImpl::setup_proxy_connection( + Socket & /*socket*/, + std::chrono::time_point /*start_time*/, + Response & /*res*/, bool & /*success*/, Error & /*error*/) { + return true; +} + void ClientImpl::shutdown_ssl(Socket & /*socket*/, bool /*shutdown_gracefully*/) { // If there are any requests in flight from threads other than us, then it's @@ -8377,27 +8530,14 @@ bool ClientImpl::send_(Request &req, Response &res, Error &error) { return false; } -#ifdef CPPHTTPLIB_SSL_ENABLED - // TODO: refactoring - if (is_ssl()) { - auto &scli = static_cast(*this); - if (!proxy_host_.empty() && proxy_port_ != -1) { - auto success = false; - if (!scli.connect_with_proxy(socket_, req.start_time_, res, success, - error)) { - if (!success) { output_error_log(error, &req); } - return success; - } - } - - if (!proxy_host_.empty() && proxy_port_ != -1) { - if (!scli.initialize_ssl(socket_, error)) { - output_error_log(error, &req); - return false; - } + { + auto success = true; + if (!setup_proxy_connection(socket_, req.start_time_, res, success, + error)) { + if (!success) { output_error_log(error, &req); } + return success; } } -#endif } // Mark the current socket as being in use so that it cannot be closed by @@ -8558,17 +8698,15 @@ ClientImpl::open_stream(const std::string &method, const std::string &path, return handle; } -#ifdef CPPHTTPLIB_SSL_ENABLED - if (is_ssl()) { - auto &scli = static_cast(*this); - if (!proxy_host_.empty() && proxy_port_ != -1) { - if (!scli.initialize_ssl(socket_, handle.error)) { - handle.response.reset(); - return handle; - } + { + auto success = true; + auto start_time = std::chrono::steady_clock::now(); + if (!setup_proxy_connection(socket_, start_time, *handle.response, + success, handle.error)) { + if (!success) { handle.response.reset(); } + return handle; } } -#endif } transfer_socket_ownership_to_handle(handle); @@ -8847,7 +8985,7 @@ bool ClientImpl::handle_request(Stream &strm, Request &req, if (res.get_header_value("Connection") == "close" || (res.version == "HTTP/1.0" && res.reason != "Connection established")) { - // TODO this requires a not-entirely-obvious chain of calls to be correct + // NOTE: this requires a not-entirely-obvious chain of calls to be correct // for this to be safe. // This is safe to call because handle_request is only called by send_ @@ -9086,14 +9224,9 @@ bool ClientImpl::write_content_with_provider(Stream &strm, auto is_shutting_down = []() { return false; }; if (req.is_chunked_content_provider_) { - // TODO: Brotli support - std::unique_ptr compressor; -#ifdef CPPHTTPLIB_ZLIB_SUPPORT - if (compress_) { - compressor = detail::make_unique(); - } else -#endif - { + auto compressor = compress_ ? detail::create_compressor().first + : std::unique_ptr(); + if (!compressor) { compressor = detail::make_unique(); } @@ -9324,14 +9457,15 @@ ClientImpl::send_with_content_provider_and_receiver( Error &error) { if (!content_type.empty()) { req.set_header("Content-Type", content_type); } -#ifdef CPPHTTPLIB_ZLIB_SUPPORT - if (compress_) { req.set_header("Content-Encoding", "gzip"); } -#endif + auto enc = compress_ + ? detail::create_compressor() + : std::pair, const char *>( + nullptr, nullptr); -#ifdef CPPHTTPLIB_ZLIB_SUPPORT - if (compress_ && !content_provider_without_length) { - // TODO: Brotli support - detail::gzip_compressor compressor; + if (enc.second) { req.set_header("Content-Encoding", enc.second); } + + if (enc.first && !content_provider_without_length) { + auto &compressor = enc.first; if (content_provider) { auto ok = true; @@ -9342,7 +9476,7 @@ ClientImpl::send_with_content_provider_and_receiver( if (ok) { auto last = offset + data_len == content_length; - auto ret = compressor.compress( + auto ret = compressor->compress( data, data_len, last, [&](const char *compressed_data, size_t compressed_data_len) { req.body.append(compressed_data, compressed_data_len); @@ -9366,19 +9500,17 @@ ClientImpl::send_with_content_provider_and_receiver( } } } else { - if (!compressor.compress(body, content_length, true, - [&](const char *data, size_t data_len) { - req.body.append(data, data_len); - return true; - })) { + if (!compressor->compress(body, content_length, true, + [&](const char *data, size_t data_len) { + req.body.append(data, data_len); + return true; + })) { error = Error::Compression; output_error_log(error, &req); return nullptr; } } - } else -#endif - { + } else { if (content_provider) { req.content_length_ = content_length; req.content_provider_ = std::move(content_provider); @@ -11545,6 +11677,24 @@ bool SSLClient::create_and_connect_socket(Socket &socket, Error &error) { return ClientImpl::create_and_connect_socket(socket, error); } +bool SSLClient::setup_proxy_connection( + Socket &socket, + std::chrono::time_point start_time, + Response &res, bool &success, Error &error) { + if (proxy_host_.empty() || proxy_port_ == -1) { return true; } + + if (!connect_with_proxy(socket, start_time, res, success, error)) { + return false; + } + + if (!initialize_ssl(socket, error)) { + success = false; + return false; + } + + return true; +} + // Assumes that socket_mutex_ is locked and that there are no requests in // flight bool SSLClient::connect_with_proxy( @@ -16061,11 +16211,11 @@ WebSocket::~WebSocket() { } void WebSocket::start_heartbeat() { + if (ping_interval_sec_ == 0) { return; } ping_thread_ = std::thread([this]() { std::unique_lock lock(ping_mutex_); while (!closed_) { - ping_cv_.wait_for(lock, std::chrono::seconds( - CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND)); + ping_cv_.wait_for(lock, std::chrono::seconds(ping_interval_sec_)); if (closed_) { break; } lock.unlock(); if (!send_frame(Opcode::Ping, nullptr, 0)) { @@ -16203,7 +16353,8 @@ bool WebSocketClient::connect() { Request req; req.method = "GET"; req.path = path_; - ws_ = std::unique_ptr(new WebSocket(std::move(strm), req, false)); + ws_ = std::unique_ptr( + new WebSocket(std::move(strm), req, false, websocket_ping_interval_sec_)); return true; } @@ -16243,6 +16394,10 @@ void WebSocketClient::set_write_timeout(time_t sec, time_t usec) { write_timeout_usec_ = usec; } +void WebSocketClient::set_websocket_ping_interval(time_t sec) { + websocket_ping_interval_sec_ = sec; +} + #ifdef CPPHTTPLIB_SSL_ENABLED void WebSocketClient::set_ca_cert_path(const std::string &path) { diff --git a/vendor/cpp-httplib/httplib.h b/vendor/cpp-httplib/httplib.h index cdde8014d9..6ec949ac51 100644 --- a/vendor/cpp-httplib/httplib.h +++ b/vendor/cpp-httplib/httplib.h @@ -8,8 +8,8 @@ #ifndef CPPHTTPLIB_HTTPLIB_H #define CPPHTTPLIB_HTTPLIB_H -#define CPPHTTPLIB_VERSION "0.37.2" -#define CPPHTTPLIB_VERSION_NUM "0x002502" +#define CPPHTTPLIB_VERSION "0.38.0" +#define CPPHTTPLIB_VERSION_NUM "0x002600" #ifdef _WIN32 #if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0A00 @@ -1666,6 +1666,11 @@ public: Server &set_payload_max_length(size_t length); + Server &set_websocket_ping_interval(time_t sec); + template + Server &set_websocket_ping_interval( + const std::chrono::duration &duration); + bool bind_to_port(const std::string &host, int port, int socket_flags = 0); int bind_to_any_port(const std::string &host, int socket_flags = 0); bool listen_after_bind(); @@ -1700,6 +1705,8 @@ protected: time_t idle_interval_sec_ = CPPHTTPLIB_IDLE_INTERVAL_SECOND; time_t idle_interval_usec_ = CPPHTTPLIB_IDLE_INTERVAL_USECOND; size_t payload_max_length_ = CPPHTTPLIB_PAYLOAD_MAX_LENGTH; + time_t websocket_ping_interval_sec_ = + CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND; private: using Handlers = @@ -1769,6 +1776,7 @@ private: struct MountPointEntry { std::string mount_point; std::string base_dir; + std::string resolved_base_dir; Headers headers; }; std::vector base_dirs_; @@ -2186,6 +2194,10 @@ protected: virtual bool create_and_connect_socket(Socket &socket, Error &error); virtual bool ensure_socket_connection(Socket &socket, Error &error); + virtual bool setup_proxy_connection( + Socket &socket, + std::chrono::time_point start_time, + Response &res, bool &success, Error &error); // All of: // shutdown_ssl @@ -2712,6 +2724,10 @@ private: std::function callback) override; bool is_ssl() const override; + bool setup_proxy_connection( + Socket &socket, + std::chrono::time_point start_time, + Response &res, bool &success, Error &error) override; bool connect_with_proxy( Socket &sock, std::chrono::time_point start_time, @@ -2911,6 +2927,8 @@ std::string encode_query_component(const std::string &component, std::string decode_query_component(const std::string &component, bool plus_as_space = true); +std::string sanitize_filename(const std::string &filename); + std::string append_query_params(const std::string &path, const Params ¶ms); std::pair make_range_header(const Ranges &ranges); @@ -3714,15 +3732,19 @@ private: friend class httplib::Server; friend class WebSocketClient; - WebSocket(Stream &strm, const Request &req, bool is_server) - : strm_(strm), req_(req), is_server_(is_server) { + WebSocket( + Stream &strm, const Request &req, bool is_server, + time_t ping_interval_sec = CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND) + : strm_(strm), req_(req), is_server_(is_server), + ping_interval_sec_(ping_interval_sec) { start_heartbeat(); } - WebSocket(std::unique_ptr &&owned_strm, const Request &req, - bool is_server) + WebSocket( + std::unique_ptr &&owned_strm, const Request &req, bool is_server, + time_t ping_interval_sec = CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND) : strm_(*owned_strm), owned_strm_(std::move(owned_strm)), req_(req), - is_server_(is_server) { + is_server_(is_server), ping_interval_sec_(ping_interval_sec) { start_heartbeat(); } @@ -3733,6 +3755,7 @@ private: std::unique_ptr owned_strm_; Request req_; bool is_server_; + time_t ping_interval_sec_; std::atomic closed_{false}; std::mutex write_mutex_; std::thread ping_thread_; @@ -3761,6 +3784,7 @@ public: const std::string &subprotocol() const; void set_read_timeout(time_t sec, time_t usec = 0); void set_write_timeout(time_t sec, time_t usec = 0); + void set_websocket_ping_interval(time_t sec); #ifdef CPPHTTPLIB_SSL_ENABLED void set_ca_cert_path(const std::string &path); @@ -3784,6 +3808,8 @@ private: time_t read_timeout_usec_ = 0; time_t write_timeout_sec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND; time_t write_timeout_usec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND; + time_t websocket_ping_interval_sec_ = + CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND; #ifdef CPPHTTPLIB_SSL_ENABLED bool is_ssl_ = false;