From 9cd4ebcfb1ceb3b4af6276089252dc27690e98ba Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 15 Mar 2026 15:11:17 +0200 Subject: [PATCH] ci : split build.yml + server.yml (#20546) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ci : split build.yml * cont : split server.yml * cont : reduce paths * cont : split build-android.yml + update paths * ci : make msys workflows manual (#20588) * ci : make cross-build workflows manual (#20585) * cont : fix release paths Co-authored-by: Sigbjørn Skjæret --------- Co-authored-by: Sigbjørn Skjæret --- .github/workflows/build-3rd-party.yml | 57 + .github/workflows/build-android.yml | 140 +++ .github/workflows/build-apple.yml | 214 ++++ .github/workflows/build-cache.yml | 42 +- .github/workflows/build-cann.yml | 102 ++ ...{build-linux-cross.yml => build-cross.yml} | 35 +- .github/workflows/build-msys.yml | 72 ++ .github/workflows/build-riscv.yml | 136 +++ .github/workflows/build-sanitize.yml | 87 ++ .github/workflows/build-vulkan.yml | 96 ++ .github/workflows/build.yml | 997 ++---------------- .github/workflows/python-lint.yml | 10 +- .github/workflows/release.yml | 37 +- .github/workflows/server-sanitize.yml | 105 ++ .github/workflows/server-self-hosted.yml | 14 +- .github/workflows/server-webui.yml | 15 +- .github/workflows/server.yml | 46 +- 17 files changed, 1248 insertions(+), 957 deletions(-) create mode 100644 .github/workflows/build-3rd-party.yml create mode 100644 .github/workflows/build-android.yml create mode 100644 .github/workflows/build-apple.yml create mode 100644 .github/workflows/build-cann.yml rename .github/workflows/{build-linux-cross.yml => build-cross.yml} (93%) create mode 100644 .github/workflows/build-msys.yml create mode 100644 .github/workflows/build-riscv.yml create mode 100644 .github/workflows/build-sanitize.yml create mode 100644 .github/workflows/build-vulkan.yml create mode 100644 .github/workflows/server-sanitize.yml diff --git a/.github/workflows/build-3rd-party.yml b/.github/workflows/build-3rd-party.yml new file mode 100644 index 0000000000..642d978644 --- /dev/null +++ b/.github/workflows/build-3rd-party.yml @@ -0,0 +1,57 @@ +name: CI (3rd-party) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-3rd-party.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ubuntu-24-llguidance: + runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_LLGUIDANCE=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml new file mode 100644 index 0000000000..cd9d99ffab --- /dev/null +++ b/.github/workflows/build-android.yml @@ -0,0 +1,140 @@ +name: CI (android) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-android.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-android.yml', + 'examples/llama.android/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + android: + runs-on: ubuntu-latest + + steps: + - name: Clone + uses: actions/checkout@v6 + + # Disabled due to size (400MB) and always 0 cache hits + # - name: ccache + # uses: ggml-org/ccache-action@v1.2.16 + # with: + # key: android-build + # evict-old-files: 1d + + - name: Set up JDK + uses: actions/setup-java@v5 + with: + java-version: 17 + distribution: zulu + + - name: Setup Android SDK + uses: android-actions/setup-android@v3 + with: + log-accepted-android-sdk-licenses: false + + - name: Build + run: | + cd examples/llama.android + ./gradlew build --no-daemon + + android-ndk: + runs-on: ubuntu-latest + + env: + OPENCL_VERSION: 2025.07.22 + + strategy: + matrix: + include: + - build: 'arm64-cpu' + defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF' + - build: 'arm64-snapdragon' + defines: '--preset arm64-android-snapdragon-release' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Install OpenCL Headers and Libs + id: install_opencl + if: ${{ matrix.build == 'arm64-snapdragon' }} + run: | + mkdir opencl + curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + tar -xaf opencl/headers.tar.gz -C opencl + tar -xaf opencl/clhpp.tar.gz -C opencl + tar -xaf opencl/icd-loader.tar.gz -C opencl + sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include + sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL + cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION} + cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared + cmake --build build + sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android + rm -rf opencl + + - name: Install Hexagon SDK + id: install_hexsdk + if: ${{ matrix.build == 'arm64-snapdragon' }} + env: + HEXSDK_VER: 6.4.0.2 + HEXTLS_VER: 19.0.04 + run: | + curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz + mkdir hex-sdk + tar -xaf hex-sdk.tar.gz -C hex-sdk + ls -l hex-sdk + sudo mv hex-sdk /opt/hexagon + echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV" + echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV" + echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV" + echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV" + echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV" + echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV" + + - name: Update CMake presets + id: update_presets + if: ${{ matrix.build == 'arm64-snapdragon' }} + run: | + cp docs/backend/snapdragon/CMakeUserPresets.json . + + - name: Build + id: ndk_build + run: | + cmake ${{ matrix.defines }} -B build + cmake --build build + cmake --install build --prefix pkg-adb/llama.cpp + + - name: Test + id: cmake_test + run: | + echo "FIXME: test on devices" diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml new file mode 100644 index 0000000000..51f0ef2302 --- /dev/null +++ b/.github/workflows/build-apple.yml @@ -0,0 +1,214 @@ +name: CI (apple) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-apple.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.swift', + '**/*.m', + '**/*.metal' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-apple.yml', + 'ggml/src/ggml-metal/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + macOS-latest-ios: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-ios + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macos-latest-ios-xcode: + runs-on: macos-latest + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Setup Xcode + uses: ggml-org/setup-xcode@v1 + with: + xcode-version: latest-stable + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + - name: xcodebuild for swift package + id: xcodebuild + run: | + ./build-xcframework.sh + + - name: Upload xcframework artifact + uses: actions/upload-artifact@v6 + with: + name: llama-xcframework + path: build-apple/llama.xcframework/ + retention-days: 1 + + - name: Build Xcode project + run: | + xcodebuild -downloadPlatform iOS + xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + + macOS-latest-tvos: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-tvos + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=tvOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-visionos: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=visionOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-swift: + runs-on: macos-latest + needs: macos-latest-ios-xcode + + strategy: + matrix: + destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-swift + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Download xcframework artifact + uses: actions/download-artifact@v7 + with: + name: llama-xcframework + path: build-apple/llama.xcframework/ + + - name: Build llama.cpp with CMake + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) diff --git a/.github/workflows/build-cache.yml b/.github/workflows/build-cache.yml index dffbf2b4ab..bc0a92c7fc 100644 --- a/.github/workflows/build-cache.yml +++ b/.github/workflows/build-cache.yml @@ -37,31 +37,31 @@ jobs: path: ./vulkan_sdk version: ${{ env.VULKAN_SDK_VERSION }} - ubuntu-24-spacemit-cache: - runs-on: ubuntu-24.04 + #ubuntu-24-spacemit-cache: + # runs-on: ubuntu-24.04 - env: - # Make sure this is in sync with build-linux-cross.yml - SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2" + # env: + # # Make sure this is in sync with build-linux-cross.yml + # SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2" - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + # steps: + # - name: Clone + # id: checkout + # uses: actions/checkout@v6 - - name: Setup Cache - uses: actions/cache@v5 - id: cache-toolchain - with: - path: ./spacemit_toolchain - key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} + # - name: Setup Cache + # uses: actions/cache@v5 + # id: cache-toolchain + # with: + # path: ./spacemit_toolchain + # key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} - - name: Setup SpacemiT Toolchain - if: steps.cache-toolchain.outputs.cache-hit != 'true' - uses: ./.github/actions/linux-setup-spacemit - with: - path: ./spacemit_toolchain - version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }} + # - name: Setup SpacemiT Toolchain + # if: steps.cache-toolchain.outputs.cache-hit != 'true' + # uses: ./.github/actions/linux-setup-spacemit + # with: + # path: ./spacemit_toolchain + # version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }} ubuntu-24-openvino-cache: runs-on: ubuntu-24.04 diff --git a/.github/workflows/build-cann.yml b/.github/workflows/build-cann.yml new file mode 100644 index 0000000000..de641ca148 --- /dev/null +++ b/.github/workflows/build-cann.yml @@ -0,0 +1,102 @@ +name: CI (cann) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-cann.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-cann.yml', + 'ggml/src/ggml-cann/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + openEuler-latest-cann: + defaults: + run: + shell: bash -el {0} + strategy: + matrix: + arch: [x86, aarch64] + chip_type: ['910b', '310p'] + build: ['Release'] + use_acl_graph: ['on', 'off'] + exclude: + # 310P does not support USE_ACL_GRAPH=on + - chip_type: '310p' + use_acl_graph: 'on' + runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Free up disk space + uses: ggml-org/free-disk-space@v1.3.1 + with: + tool-cache: true + + - name: Set container image + id: cann-image + run: | + image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}" + echo "image=${image}" >> "${GITHUB_OUTPUT}" + + - name: Pull container image + run: docker pull "${{ steps.cann-image.outputs.image }}" + + - name: Build + env: + BUILD_TYPE: ${{ matrix.build }} + SOC_TYPE: ascend${{ matrix.chip_type }} + USE_ACL_GRAPH: ${{ matrix.use_acl_graph }} + run: | + HOST_UID=$(id -u) + HOST_GID=$(id -g) + + docker run --rm \ + -v "${PWD}:/workspace" \ + -w /workspace \ + -e SOC_TYPE=${SOC_TYPE} \ + -e BUILD_TYPE=${BUILD_TYPE} \ + -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \ + "${{ steps.cann-image.outputs.image }}" \ + bash -lc ' + set -e + yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel + yum clean all && rm -rf /var/cache/yum + git config --global --add safe.directory "/workspace" + export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DGGML_CANN=on \ + -DSOC_TYPE=${SOC_TYPE} \ + -DUSE_ACL_GRAPH=${USE_ACL_GRAPH} + cmake --build build -j $(nproc) + + chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build + ' diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-cross.yml similarity index 93% rename from .github/workflows/build-linux-cross.yml rename to .github/workflows/build-cross.yml index dbcc1ee2ae..74508129ac 100644 --- a/.github/workflows/build-linux-cross.yml +++ b/.github/workflows/build-cross.yml @@ -1,7 +1,24 @@ -name: Build on Linux using cross-compiler +name: CI (cross) on: + # only manual triggers due to low-importance of the workflows + # TODO: for regular runs, provision dedicated self-hosted runners workflow_dispatch: - workflow_call: + push: + branches: + - master + paths: [ + '.github/workflows/build-cross.yml', + 'ggml/src/spacemit/*', + 'ggml/src/arch/loongarch/*' + ] + # run once every week + schedule: + - cron: '0 0 * * 0' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + jobs: # ubuntu-24-riscv64-cpu-cross: @@ -264,15 +281,15 @@ jobs: steps: - uses: actions/checkout@v6 - - name: Use SpacemiT Toolchain Cache - uses: actions/cache@v5 - id: cache-toolchain - with: - path: ./spacemit_toolchain - key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} + #- name: Use SpacemiT Toolchain Cache + # uses: actions/cache@v5 + # id: cache-toolchain + # with: + # path: ./spacemit_toolchain + # key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} - name: Setup SpacemiT Toolchain - if: steps.cache-toolchain.outputs.cache-hit != 'true' + #if: steps.cache-toolchain.outputs.cache-hit != 'true' uses: ./.github/actions/linux-setup-spacemit with: path: ./spacemit_toolchain diff --git a/.github/workflows/build-msys.yml b/.github/workflows/build-msys.yml new file mode 100644 index 0000000000..431d9b6a53 --- /dev/null +++ b/.github/workflows/build-msys.yml @@ -0,0 +1,72 @@ +name: CI (msys) + +on: + # only manual triggers due to low-importance of the workflows + # TODO: for regular runs, provision dedicated self-hosted runners + workflow_dispatch: + # run once every week + schedule: + - cron: '0 0 * * 0' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + windows-msys2: + runs-on: windows-2025 + + strategy: + fail-fast: false + matrix: + include: + - { sys: UCRT64, env: ucrt-x86_64, build: Release } + - { sys: CLANG64, env: clang-x86_64, build: Release } + + steps: + - name: Clone + uses: actions/checkout@v6 + + #- name: ccache + # uses: ggml-org/ccache-action@v1.2.16 + # with: + # key: windows-msys2 + # variant: ccache + # evict-old-files: 1d + # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Setup ${{ matrix.sys }} + uses: msys2/setup-msys2@v2 + with: + update: true + msystem: ${{matrix.sys}} + install: >- + base-devel + git + mingw-w64-${{matrix.env}}-toolchain + mingw-w64-${{matrix.env}}-cmake + mingw-w64-${{matrix.env}}-openblas + + - name: Build using CMake + shell: msys2 {0} + run: | + cmake -B build + cmake --build build --config ${{ matrix.build }} -j $(nproc) + + - name: Clean after building using CMake + shell: msys2 {0} + run: | + rm -rf build + + - name: Build using CMake w/ OpenBLAS + shell: msys2 {0} + run: | + cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS + cmake --build build --config ${{ matrix.build }} -j $(nproc) diff --git a/.github/workflows/build-riscv.yml b/.github/workflows/build-riscv.yml new file mode 100644 index 0000000000..36a3a1155a --- /dev/null +++ b/.github/workflows/build-riscv.yml @@ -0,0 +1,136 @@ +name: CI (riscv) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-riscv.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-riscv.yml', + 'ggml/src/ggml-cpu/arch/riscv/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ubuntu-riscv64-native-sanitizer: + runs-on: RISCV64 + + continue-on-error: true + + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug] + + steps: + - name: Install dependencies + run: | + sudo apt-get update + + # Install necessary packages + sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs + + # Set gcc-14 and g++-14 as the default compilers + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc + sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ + + # Install Rust stable version + rustup install stable + rustup default stable + + git lfs install + + - name: GCC version check + run: | + gcc --version + g++ --version + + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Setup ccache + run: | + # Unique cache directory per matrix combination + export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}" + mkdir -p "$CCACHE_DIR" + + # Configure ccache + ccache --set-config=max_size=5G + ccache --set-config=compression=true + ccache --set-config=compression_level=6 + ccache --set-config=cache_dir="$CCACHE_DIR" + ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime + ccache --set-config=hash_dir=false + + # Export for subsequent steps + echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + + - name: Build + id: cmake_build + if: ${{ matrix.sanitizer != 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=ON \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Build (no OpenMP) + id: cmake_build_no_openmp + if: ${{ matrix.sanitizer == 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 diff --git a/.github/workflows/build-sanitize.yml b/.github/workflows/build-sanitize.yml new file mode 100644 index 0000000000..0b17857504 --- /dev/null +++ b/.github/workflows/build-sanitize.yml @@ -0,0 +1,87 @@ +name: CI (sanitize) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-sanitize.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ubuntu-latest-sanitizer: + runs-on: ubuntu-latest + + continue-on-error: true + + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-latest-sanitizer-${{ matrix.sanitizer }} + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + + - name: Build + id: cmake_build + if: ${{ matrix.sanitizer != 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Build (no OpenMP) + id: cmake_build_no_openmp + if: ${{ matrix.sanitizer == 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=OFF + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 diff --git a/.github/workflows/build-vulkan.yml b/.github/workflows/build-vulkan.yml new file mode 100644 index 0000000000..b25ec51471 --- /dev/null +++ b/.github/workflows/build-vulkan.yml @@ -0,0 +1,96 @@ +name: CI (vulkan) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build-vulkan.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.comp', + '**/*.glsl' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-vulkan.yml', + 'ggml/src/ggml-vulkan/**' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ubuntu-24-vulkan-llvmpipe: + runs-on: ubuntu-24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-24-vulkan-llvmpipe + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo add-apt-repository -y ppa:kisak/kisak-mesa + sudo apt-get update -y + sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev + + - name: Get latest Vulkan SDK version + id: vulkan_sdk_version + run: | + echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV" + + - name: Use Vulkan SDK Cache + uses: actions/cache@v5 + id: cache-sdk + with: + path: ./vulkan_sdk + key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }} + + - name: Setup Vulkan SDK + if: steps.cache-sdk.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-vulkan-llvmpipe + with: + path: ./vulkan_sdk + version: ${{ env.VULKAN_SDK_VERSION }} + + - name: Build + id: cmake_build + run: | + source ./vulkan_sdk/setup-env.sh + cmake -B build \ + -DGGML_VULKAN=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + export GGML_VK_VISIBLE_DEVICES=0 + export GGML_VK_DISABLE_F16=1 + export GGML_VK_DISABLE_COOPMAT=1 + # This is using llvmpipe and runs slower than other backends + ctest -L main --verbose --timeout 4800 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 460a770122..b60ad993b8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,6 @@ on: - master paths: [ '.github/workflows/build.yml', - '.github/workflows/build-linux-cross.yml', '.github/workflows/build-cmake-pkg.yml', '**/CMakeLists.txt', '**/.cmake', @@ -29,7 +28,6 @@ on: types: [opened, synchronize, reopened] paths: [ '.github/workflows/build.yml', - '.github/workflows/build-linux-cross.yml', '.github/workflows/build-cmake-pkg.yml', '**/CMakeLists.txt', '**/.cmake', @@ -59,7 +57,10 @@ env: LLAMA_LOG_TIMESTAMPS: 1 jobs: - macOS-latest-cmake-arm64: + build-cmake-pkg: + uses: ./.github/workflows/build-cmake-pkg.yml + + macOS-latest-arm64: runs-on: macos-latest steps: @@ -70,7 +71,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-arm64 + key: macOS-latest-arm64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -95,7 +96,7 @@ jobs: cd build ctest -L main -E "test-llama-archs" --verbose --timeout 900 - macOS-latest-cmake-x64: + macOS-latest-x64: runs-on: macos-15-intel steps: @@ -106,7 +107,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-x64 + key: macOS-latest-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -131,7 +132,7 @@ jobs: cd build ctest -L main --verbose --timeout 900 - macOS-latest-cmake-arm64-webgpu: + macOS-latest-arm64-webgpu: runs-on: macos-latest steps: @@ -142,7 +143,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-arm64-webgpu + key: macOS-latest-arm64-webgpu evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -173,7 +174,7 @@ jobs: cd build ctest -L main --verbose --timeout 900 - ubuntu-cpu-cmake: + ubuntu-cpu: strategy: matrix: include: @@ -196,7 +197,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-cpu-cmake-${{ matrix.build }} + key: ubuntu-cpu-${{ matrix.build }} evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -258,98 +259,11 @@ jobs: wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf ./bin/llama-completion -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - ubuntu-latest-cmake-sanitizer: + ubuntu-latest-rpc: runs-on: ubuntu-latest continue-on-error: true - strategy: - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }} - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libssl-dev - - - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DGGML_OPENMP=OFF - - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-24-llguidance: - runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libssl-dev - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_LLGUIDANCE=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-24-cmake-rpc: - runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} - - continue-on-error: true - steps: - name: Clone id: checkout @@ -374,7 +288,7 @@ jobs: cd build ctest -L main --verbose - ubuntu-24-cmake-vulkan-deb: + ubuntu-24-vulkan: runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} steps: @@ -401,7 +315,7 @@ jobs: run: | cmake --build build -j $(nproc) - ubuntu-24-cmake-vulkan: + ubuntu-24-webgpu: runs-on: ubuntu-24.04 steps: @@ -412,66 +326,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-24-cmake-vulkan - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Dependencies - id: depends - run: | - sudo add-apt-repository -y ppa:kisak/kisak-mesa - sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev - - - name: Get latest Vulkan SDK version - id: vulkan_sdk_version - run: | - echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV" - - - name: Use Vulkan SDK Cache - uses: actions/cache@v5 - id: cache-sdk - with: - path: ./vulkan_sdk - key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }} - - - name: Setup Vulkan SDK - if: steps.cache-sdk.outputs.cache-hit != 'true' - uses: ./.github/actions/linux-setup-vulkan - with: - path: ./vulkan_sdk - version: ${{ env.VULKAN_SDK_VERSION }} - - - name: Build - id: cmake_build - run: | - source ./vulkan_sdk/setup-env.sh - cmake -B build \ - -DGGML_VULKAN=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - export GGML_VK_VISIBLE_DEVICES=0 - export GGML_VK_DISABLE_F16=1 - export GGML_VK_DISABLE_COOPMAT=1 - # This is using llvmpipe and runs slower than other backends - ctest -L main --verbose --timeout 4800 - - ubuntu-24-cmake-webgpu: - runs-on: ubuntu-24.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-24-cmake-webgpu + key: ubuntu-24-webgpu evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -531,7 +386,7 @@ jobs: # This is using llvmpipe and runs slower than other backends ctest -L main --verbose --timeout 900 - ubuntu-24-wasm-webgpu: + ubuntu-24-webgpu-wasm: runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} steps: @@ -565,7 +420,7 @@ jobs: cmake --build build-wasm --target test-backend-ops -j $(nproc) - ubuntu-22-cmake-hip: + ubuntu-22-hip: runs-on: ubuntu-22.04 container: rocm/dev-ubuntu-22.04:6.1.2 @@ -583,7 +438,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-hip + key: ubuntu-22-hip evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -596,7 +451,7 @@ jobs: -DGGML_HIP=ON cmake --build build --config Release -j $(nproc) - ubuntu-22-cmake-musa: + ubuntu-22-musa: runs-on: ubuntu-22.04 container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64 @@ -614,7 +469,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-musa + key: ubuntu-22-musa evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -625,7 +480,7 @@ jobs: -DGGML_MUSA=ON cmake --build build --config Release -j $(nproc) - ubuntu-22-cmake-sycl: + ubuntu-22-sycl: runs-on: ubuntu-22.04 continue-on-error: true @@ -660,7 +515,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-sycl + key: ubuntu-22-sycl evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -674,7 +529,7 @@ jobs: -DCMAKE_CXX_COMPILER=icpx cmake --build build --config Release -j $(nproc) - ubuntu-22-cmake-sycl-fp16: + ubuntu-22-sycl-fp16: runs-on: ubuntu-22.04 continue-on-error: true @@ -709,7 +564,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-sycl-fp16 + key: ubuntu-22-sycl-fp16 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -724,8 +579,8 @@ jobs: -DGGML_SYCL_F16=ON cmake --build build --config Release -j $(nproc) - ubuntu-24-cmake-openvino: - name: ubuntu-24-cmake-openvino-${{ matrix.openvino_device }} + ubuntu-24-openvino: + name: ubuntu-24-openvino-${{ matrix.openvino_device }} strategy: matrix: include: @@ -751,7 +606,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-24-cmake-openvino-${{ matrix.variant }}-no-preset-v1 + key: ubuntu-24-openvino-${{ matrix.variant }}-no-preset-v1 evict-old-files: 1d - name: Dependencies @@ -801,194 +656,7 @@ jobs: fi ctest --test-dir build/ReleaseOV -L main -E "test-llama-archs" --verbose --timeout 2000 - build-linux-cross: - uses: ./.github/workflows/build-linux-cross.yml - - build-cmake-pkg: - uses: ./.github/workflows/build-cmake-pkg.yml - - macOS-latest-cmake-ios: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-ios - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-cmake-tvos: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-tvos - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=tvOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-cmake-visionos: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=visionOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-swift: - runs-on: macos-latest - needs: ios-xcode-build - - strategy: - matrix: - destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-swift - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Download xcframework artifact - uses: actions/download-artifact@v7 - with: - name: llama-xcframework - path: build-apple/llama.xcframework/ - - - name: Build llama.cpp with CMake - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_OPENSSL=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - windows-msys2: - runs-on: windows-2025 - - strategy: - fail-fast: false - matrix: - include: - - { sys: UCRT64, env: ucrt-x86_64, build: Release } - - { sys: CLANG64, env: clang-x86_64, build: Release } - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-msys2 - variant: ccache - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Setup ${{ matrix.sys }} - uses: msys2/setup-msys2@v2 - with: - update: true - msystem: ${{matrix.sys}} - install: >- - base-devel - git - mingw-w64-${{matrix.env}}-toolchain - mingw-w64-${{matrix.env}}-cmake - mingw-w64-${{matrix.env}}-openblas - - - name: Build using CMake - shell: msys2 {0} - run: | - cmake -B build - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - - name: Clean after building using CMake - shell: msys2 {0} - run: | - rm -rf build - - - name: Build using CMake w/ OpenBLAS - shell: msys2 {0} - run: | - cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - windows-latest-cmake: + windows-latest: runs-on: windows-2025 env: @@ -1023,7 +691,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-${{ matrix.build }} + key: windows-latest-${{ matrix.build }} variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -1110,7 +778,7 @@ jobs: # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 # & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - ubuntu-latest-cmake-cuda: + ubuntu-latest-cuda: runs-on: ubuntu-latest container: nvidia/cuda:12.6.2-devel-ubuntu24.04 @@ -1129,7 +797,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-latest-cmake-cuda + key: ubuntu-latest-cuda evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -1146,7 +814,7 @@ jobs: -DGGML_CUDA_CUB_3DOT2=ON cmake --build build - windows-2022-cmake-cuda: + windows-2022-cuda: runs-on: windows-2022 strategy: @@ -1195,7 +863,7 @@ jobs: cmake --build build --config Release -j %NINJA_JOBS% -t ggml cmake --build build --config Release - windows-latest-cmake-sycl: + windows-latest-sycl: runs-on: windows-2022 defaults: @@ -1214,7 +882,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-sycl + key: windows-latest-sycl variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -1229,7 +897,7 @@ jobs: id: cmake_build run: examples/sycl/win-build-sycl.bat - windows-latest-cmake-hip: + windows-latest-hip: runs-on: windows-2022 env: @@ -1296,223 +964,95 @@ jobs: -DGGML_RPC=ON cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - ios-xcode-build: - runs-on: macos-latest + ubuntu-cpu-riscv64-native: + runs-on: RISCV64 steps: - - name: Checkout code - uses: actions/checkout@v6 - - - name: Setup Xcode - uses: maxim-lobanov/setup-xcode@v1 - with: - xcode-version: latest-stable - - - name: Build - id: cmake_build + - name: Install dependencies run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_OPENSSL=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + sudo apt-get update - - name: xcodebuild for swift package - id: xcodebuild - run: | - ./build-xcframework.sh + # Install necessary packages + sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache git-lfs - - name: Upload xcframework artifact - uses: actions/upload-artifact@v6 - with: - name: llama-xcframework - path: build-apple/llama.xcframework/ - retention-days: 1 + # Set gcc-14 and g++-14 as the default compilers + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc + sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - name: Build Xcode project - run: | - xcodebuild -downloadPlatform iOS - xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + # Install Rust stable version + rustup install stable + rustup default stable - android-build: - runs-on: ubuntu-latest + git lfs install - steps: - - name: Clone - uses: actions/checkout@v6 - - # Disabled due to size (400MB) and always 0 cache hits - # - name: ccache - # uses: ggml-org/ccache-action@v1.2.16 - # with: - # key: android-build - # evict-old-files: 1d - - - name: Set up JDK - uses: actions/setup-java@v5 - with: - java-version: 17 - distribution: zulu - - - name: Setup Android SDK - uses: android-actions/setup-android@v3 - with: - log-accepted-android-sdk-licenses: false - - - name: Build - run: | - cd examples/llama.android - ./gradlew build --no-daemon - - android-ndk-build: - runs-on: ubuntu-latest - - env: - OPENCL_VERSION: 2025.07.22 - - strategy: - matrix: - include: - - build: 'arm64-cpu' - defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF' - - build: 'arm64-snapdragon' - defines: '--preset arm64-android-snapdragon-release' - - steps: - name: Clone id: checkout uses: actions/checkout@v6 - - name: Install OpenCL Headers and Libs - id: install_opencl - if: ${{ matrix.build == 'arm64-snapdragon' }} + - name: Check environment run: | - mkdir opencl - curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz - curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz - curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz - tar -xaf opencl/headers.tar.gz -C opencl - tar -xaf opencl/clhpp.tar.gz -C opencl - tar -xaf opencl/icd-loader.tar.gz -C opencl - sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include - sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL - cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION} - cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared - cmake --build build - sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android - rm -rf opencl + uname -a + gcc --version + g++ --version + ldd --version + cmake --version + rustc --version - - name: Install Hexagon SDK - id: install_hexsdk - if: ${{ matrix.build == 'arm64-snapdragon' }} - env: - HEXSDK_VER: 6.4.0.2 - HEXTLS_VER: 19.0.04 + - name: Setup ccache run: | - curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz - mkdir hex-sdk - tar -xaf hex-sdk.tar.gz -C hex-sdk - ls -l hex-sdk - sudo mv hex-sdk /opt/hexagon - echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV" - echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV" - echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV" - echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV" - echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV" - echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV" + # Set unique cache directory for this job + export CCACHE_DIR="$HOME/.ccache/cpu-cmake-rv64-native" + mkdir -p "$CCACHE_DIR" - - name: Update CMake presets - id: update_presets - if: ${{ matrix.build == 'arm64-snapdragon' }} - run: | - cp docs/backend/snapdragon/CMakeUserPresets.json . + # Configure ccache for optimal performance + ccache --set-config=max_size=5G + ccache --set-config=compression=true + ccache --set-config=compression_level=6 + ccache --set-config=cache_dir="$CCACHE_DIR" + + # Enable more aggressive caching + ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime + ccache --set-config=hash_dir=false + + # Export for subsequent steps + echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - name: Build - id: ndk_build + id: cmake_build run: | - cmake ${{ matrix.defines }} -B build - cmake --build build - cmake --install build --prefix pkg-adb/llama.cpp + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=ON \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DGGML_RPC=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config Release -j $(nproc) - name: Test id: cmake_test run: | - echo "FIXME: test on devices" + cd build + ctest -L main --verbose --timeout 900 - openEuler-latest-cmake-cann: - defaults: - run: - shell: bash -el {0} - strategy: - matrix: - arch: [x86, aarch64] - chip_type: ['910b', '310p'] - build: ['Release'] - use_acl_graph: ['on', 'off'] - exclude: - # 310P does not support USE_ACL_GRAPH=on - - chip_type: '310p' - use_acl_graph: 'on' - runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} - steps: - - name: Checkout - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - - name: Free up disk space - uses: ggml-org/free-disk-space@v1.3.1 - with: - tool-cache: true - - - name: Set container image - id: cann-image + - name: Test llama2c conversion + id: llama2c_test run: | - image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}" - echo "image=${image}" >> "${GITHUB_OUTPUT}" - - - name: Pull container image - run: docker pull "${{ steps.cann-image.outputs.image }}" - - - name: Build - env: - BUILD_TYPE: ${{ matrix.build }} - SOC_TYPE: ascend${{ matrix.chip_type }} - USE_ACL_GRAPH: ${{ matrix.use_acl_graph }} - run: | - HOST_UID=$(id -u) - HOST_GID=$(id -g) - - docker run --rm \ - -v "${PWD}:/workspace" \ - -w /workspace \ - -e SOC_TYPE=${SOC_TYPE} \ - -e BUILD_TYPE=${BUILD_TYPE} \ - -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \ - "${{ steps.cann-image.outputs.image }}" \ - bash -lc ' - set -e - yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel - yum clean all && rm -rf /var/cache/yum - git config --global --add safe.directory "/workspace" - export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} - cmake -S . -B build \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DGGML_CANN=on \ - -DSOC_TYPE=${SOC_TYPE} \ - -DUSE_ACL_GRAPH=${USE_ACL_GRAPH} - cmake --build build -j $(nproc) - - chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build - ' + cd build + echo "Fetch tokenizer" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin + echo "Fetch llama2c model" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin + ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf + ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 # TODO: simplify the following workflows using a matrix # TODO: run lighter CI on PRs and the full CI only on master (if needed) @@ -1672,342 +1212,7 @@ jobs: run: | GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - ubuntu-cpu-cmake-riscv64-native: - runs-on: RISCV64 - - steps: - - name: Install dependencies - run: | - sudo apt-get update - - # Install necessary packages - sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache git-lfs - - # Set gcc-14 and g++-14 as the default compilers - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 - sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc - sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - # Install Rust stable version - rustup install stable - rustup default stable - - git lfs install - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Check environment - run: | - uname -a - gcc --version - g++ --version - ldd --version - cmake --version - rustc --version - - - name: Setup ccache - run: | - # Set unique cache directory for this job - export CCACHE_DIR="$HOME/.ccache/cpu-cmake-rv64-native" - mkdir -p "$CCACHE_DIR" - - # Configure ccache for optimal performance - ccache --set-config=max_size=5G - ccache --set-config=compression=true - ccache --set-config=compression_level=6 - ccache --set-config=cache_dir="$CCACHE_DIR" - - # Enable more aggressive caching - ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime - ccache --set-config=hash_dir=false - - # Export for subsequent steps - echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV - echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=ON \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DGGML_RPC=ON \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 - - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - - name: Test llama2c conversion - id: llama2c_test - run: | - cd build - echo "Fetch tokenizer" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin - echo "Fetch llama2c model" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin - ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf - ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - - ubuntu-cmake-sanitizer-riscv64-native: - runs-on: RISCV64 - - continue-on-error: true - - strategy: - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug] - - steps: - - name: Install dependencies - run: | - sudo apt-get update - - # Install necessary packages - sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs - - # Set gcc-14 and g++-14 as the default compilers - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 - sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc - sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - # Install Rust stable version - rustup install stable - rustup default stable - - git lfs install - - - name: GCC version check - run: | - gcc --version - g++ --version - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Setup ccache - run: | - # Unique cache directory per matrix combination - export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}" - mkdir -p "$CCACHE_DIR" - - # Configure ccache - ccache --set-config=max_size=5G - ccache --set-config=compression=true - ccache --set-config=compression_level=6 - ccache --set-config=cache_dir="$CCACHE_DIR" - ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime - ccache --set-config=hash_dir=false - - # Export for subsequent steps - echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV - echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - - - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_OPENSSL=OFF \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DGGML_OPENMP=ON \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 - - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_OPENSSL=OFF \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 - - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - - ubuntu-llguidance-riscv64-native: - runs-on: RISCV64 - steps: - - name: Install dependencies - run: | - sudo apt-get update - - # Install necessary packages - sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs - - # Set gcc-14 and g++-14 as the default compilers - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 - sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc - sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - # Install Rust stable version - rustup install stable - rustup default stable - - git lfs install - - - name: GCC version check - run: | - gcc --version - g++ --version - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Setup ccache - run: | - export CCACHE_DIR="$HOME/.ccache/llguidance-riscv64" - mkdir -p "$CCACHE_DIR" - - ccache --set-config=max_size=5G - ccache --set-config=compression=true - ccache --set-config=compression_level=6 - ccache --set-config=cache_dir="$CCACHE_DIR" - ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime - ccache --set-config=hash_dir=false - - echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV - echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DLLAMA_OPENSSL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DLLAMA_LLGUIDANCE=ON \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 - - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - - ubuntu-cmake-rpc-riscv64-native: - runs-on: RISCV64 - - continue-on-error: true - - steps: - - name: Install dependencies - run: | - sudo apt-get update - - # Install necessary packages - sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache git-lfs - - # Set gcc-14 and g++-14 as the default compilers - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 - sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc - sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ - - # Install Rust stable version - rustup install stable - rustup default stable - - git lfs install - - - name: GCC version check - run: | - gcc --version - g++ --version - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Setup ccache - run: | - export CCACHE_DIR="$HOME/.ccache/rpc-riscv64" - mkdir -p "$CCACHE_DIR" - - ccache --set-config=max_size=5G - ccache --set-config=compression=true - ccache --set-config=compression_level=6 - ccache --set-config=cache_dir="$CCACHE_DIR" - ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime - ccache --set-config=hash_dir=false - - echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV - echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=ON \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ - -DGGML_RPC=ON - - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose - - ggml-ci-arm64-graviton4-kleidiai: + ggml-ci-arm64-cpu-kleidiai-graviton4: runs-on: ah-ubuntu_22_04-c8g_8x steps: @@ -2044,7 +1249,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ggml-ci-arm64-graviton4-kleidiai + key: ggml-ci-arm64-cpu-kleidiai-graviton4 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 8d1dd7a7d5..e21b3b6568 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -4,10 +4,16 @@ on: push: branches: - master - paths: ['.github/workflows/python-lint.yml', '**/*.py'] + paths: [ + '.github/workflows/python-lint.yml', + '**/*.py' + ] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/python-lint.yml', '**/*.py'] + paths: [ + '.github/workflows/python-lint.yml', + '**/*.py' + ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f329630071..b0f2714ffd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,7 +10,22 @@ on: push: branches: - master - paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + paths: [ + '.github/workflows/release.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh', + '**/*.swift', + '**/*.m', + '**/*.metal', + '**/*.comp', + '**/*.glsl' + ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -34,7 +49,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-arm64 + key: macOS-latest-arm64 evict-old-files: 1d - name: Build @@ -81,7 +96,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: macOS-latest-cmake-x64 + key: macOS-latest-x64 evict-old-files: 1d - name: Build @@ -140,7 +155,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-cpu-cmake-${{ matrix.build }} + key: ubuntu-cpu-${{ matrix.build }} evict-old-files: 1d - name: Dependencies @@ -191,7 +206,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-22-cmake-vulkan + key: ubuntu-22-vulkan evict-old-files: 1d - name: Dependencies @@ -256,7 +271,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-24-cmake-openvino-release-no-preset-v1 + key: ubuntu-24-openvino-release-no-preset-v1 evict-old-files: 1d - name: Dependencies @@ -329,7 +344,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-cpu-${{ matrix.arch }} + key: windows-latest-cpu-${{ matrix.arch }} variant: ccache evict-old-files: 1d @@ -390,7 +405,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }} + key: windows-latest-${{ matrix.backend }}-${{ matrix.arch }} variant: ccache evict-old-files: 1d @@ -536,7 +551,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-sycl + key: windows-latest-sycl variant: ccache evict-old-files: 1d @@ -616,7 +631,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: ubuntu-rocm-cmake-${{ matrix.ROCM_VERSION }}-${{ matrix.build }} + key: ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }} evict-old-files: 1d - name: Dependencies @@ -726,7 +741,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.16 with: - key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 + key: windows-latest-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 evict-old-files: 1d - name: Install ROCm diff --git a/.github/workflows/server-sanitize.yml b/.github/workflows/server-sanitize.yml new file mode 100644 index 0000000000..4c9f447cf8 --- /dev/null +++ b/.github/workflows/server-sanitize.yml @@ -0,0 +1,105 @@ +name: Server (sanitize) + +on: + workflow_dispatch: # allows manual triggering + inputs: + sha: + description: 'Commit SHA1 to build' + required: false + type: string + slow_tests: + description: 'Run slow tests' + required: true + type: boolean + push: + branches: + - master + paths: [ + '.github/workflows/server-sanitize.yml', + '**/CMakeLists.txt', + '**/Makefile', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + 'tools/server/**.*' + ] + +env: + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_LOG_VERBOSITY: 10 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + server: + runs-on: ubuntu-latest + + strategy: + matrix: + sanitizer: [ADDRESS, UNDEFINED] # THREAD is very slow + build_type: [RelWithDebInfo] + fail-fast: false + + steps: + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get -y install \ + build-essential \ + xxd \ + git \ + cmake \ + curl \ + wget \ + language-pack-en \ + libssl-dev + + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_BUILD_BORINGSSL=ON \ + -DGGML_SCHED_NO_REALLOC=ON \ + -DGGML_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ + -DGGML_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ + -DGGML_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} \ + -DLLAMA_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ + -DLLAMA_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ + -DLLAMA_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + + - name: Python setup + id: setup_python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + pip-install: -r tools/server/tests/requirements.txt + + - name: Tests + id: server_integration_tests + if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} + run: | + cd tools/server/tests + export ${{ matrix.extra_args }} + pytest -v -x -m "not slow" + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} + run: | + cd tools/server/tests + export ${{ matrix.extra_args }} + SLOW_TESTS=1 pytest -v -x diff --git a/.github/workflows/server-self-hosted.yml b/.github/workflows/server-self-hosted.yml index a11aea7e89..29bd79690a 100644 --- a/.github/workflows/server-self-hosted.yml +++ b/.github/workflows/server-self-hosted.yml @@ -14,7 +14,19 @@ on: push: branches: - master - paths: ['.github/workflows/server-self-hosted.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + paths: [ + '.github/workflows/server-self-hosted.yml', + '**/CMakeLists.txt', + '**/Makefile', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.swift', + '**/*.m', + 'tools/server/**.*' + ] env: LLAMA_LOG_COLORS: 1 diff --git a/.github/workflows/server-webui.yml b/.github/workflows/server-webui.yml index 4d560ff58d..492107ffd8 100644 --- a/.github/workflows/server-webui.yml +++ b/.github/workflows/server-webui.yml @@ -1,4 +1,3 @@ -# Server WebUI build and tests name: Server WebUI on: @@ -11,10 +10,20 @@ on: push: branches: - master - paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**'] + paths: [ + '.github/workflows/server-webui.yml', + 'tools/server/webui/**.*', + 'tools/server/tests/**.*', + 'tools/server/public/**' + ] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**'] + paths: [ + '.github/workflows/server-webui.yml', + 'tools/server/webui/**.*', + 'tools/server/tests/**.*', + 'tools/server/public/**' + ] env: LLAMA_LOG_COLORS: 1 diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 99d05226ba..750c29f08e 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -1,4 +1,3 @@ -# Server build and tests name: Server on: @@ -15,10 +14,34 @@ on: push: branches: - master - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + paths: [ + '.github/workflows/server.yml', + '**/CMakeLists.txt', + '**/Makefile', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.swift', + '**/*.m', + 'tools/server/**.*' + ] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + paths: [ + '.github/workflows/server.yml', + '**/CMakeLists.txt', + '**/Makefile', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.swift', + '**/*.m', + 'tools/server/**.*' + ] env: LLAMA_LOG_COLORS: 1 @@ -34,17 +57,18 @@ jobs: server: runs-on: ubuntu-latest + name: server (${{ matrix.wf_name }}) strategy: matrix: - sanitizer: [ADDRESS, UNDEFINED] # THREAD is very slow - build_type: [RelWithDebInfo] + build_type: [Release] + wf_name: ["default"] include: - build_type: Release - sanitizer: "" extra_args: "" + wf_name: "default" - build_type: Release - sanitizer: "" extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" + wf_name: "backend-sampling" fail-fast: false steps: @@ -74,13 +98,7 @@ jobs: run: | cmake -B build \ -DLLAMA_BUILD_BORINGSSL=ON \ - -DGGML_SCHED_NO_REALLOC=ON \ - -DGGML_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ - -DGGML_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ - -DGGML_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} \ - -DLLAMA_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ - -DLLAMA_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ - -DLLAMA_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} + -DGGML_SCHED_NO_REALLOC=ON cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - name: Python setup