diff --git a/.github/workflows/build-cache.yml b/.github/workflows/build-cache.yml index 7cfdaff605..dffbf2b4ab 100644 --- a/.github/workflows/build-cache.yml +++ b/.github/workflows/build-cache.yml @@ -67,7 +67,7 @@ jobs: runs-on: ubuntu-24.04 env: - # Sync versions in build.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile + # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile OPENVINO_VERSION_MAJOR: "2026.0" OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886" diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml new file mode 100644 index 0000000000..eba06b96bc --- /dev/null +++ b/.github/workflows/build-self-hosted.yml @@ -0,0 +1,250 @@ +name: CI (self-hosted) + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh', + '**/*.swift', + '**/*.m', + '**/*.metal', + '**/*.comp', + '**/*.glsl', + '**/*.wgsl' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build-self-hosted.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh', + '**/*.swift', + '**/*.m', + '**/*.metal', + '**/*.comp', + '**/*.glsl', + '**/*.wgsl' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + ggml-ci-nvidia-cuda: + runs-on: [self-hosted, Linux, NVIDIA] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + nvidia-smi + GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + ggml-ci-nvidia-vulkan-cm: + runs-on: [self-hosted, Linux, NVIDIA] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + ggml-ci-nvidia-vulkan-cm2: + runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + ggml-ci-cpu-amx: + runs-on: [self-hosted, Linux, CPU, AMX] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + # ggml-ci-amd-vulkan: + # runs-on: [self-hosted, Linux, AMD] + + # steps: + # - name: Clone + # id: checkout + # uses: actions/checkout@v6 + + # - name: Test + # id: ggml-ci + # run: | + # vulkaninfo --summary + # GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + # ggml-ci-amd-rocm: + # runs-on: [self-hosted, Linux, AMD] + + # steps: + # - name: Clone + # id: checkout + # uses: actions/checkout@v6 + + # - name: Test + # id: ggml-ci + # run: | + # amd-smi static + # GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + ggml-ci-mac-metal: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-mac-webgpu: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dawn Dependency + id: dawn-depends + run: | + DAWN_VERSION="v2.0.0" + DAWN_OWNER="reeselevine" + DAWN_REPO="dawn" + DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release" + echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" + curl -L -o artifact.zip \ + "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" + mkdir dawn + unzip artifact.zip + tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1 + + - name: Test + id: ggml-ci + run: | + GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \ + bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-mac-vulkan: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-linux-intel-vulkan: + runs-on: [self-hosted, Linux, Intel] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-intel-openvino-gpu-low-perf: + runs-on: [self-hosted, Linux, Intel, OpenVINO] + + env: + # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile + OPENVINO_VERSION_MAJOR: "2026.0" + OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886" + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Use OpenVINO Toolkit Cache + uses: actions/cache@v5 + id: cache-openvino + with: + path: ./openvino_toolkit + key: openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }} + + - name: Setup OpenVINO Toolkit + if: steps.cache-openvino.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-openvino + with: + path: ./openvino_toolkit + version_major: ${{ env.OPENVINO_VERSION_MAJOR }} + version_full: ${{ env.OPENVINO_VERSION_FULL }} + + - name: Install OpenVINO dependencies + run: | + cd ./openvino_toolkit + chmod +x ./install_dependencies/install_openvino_dependencies.sh + echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh + + - name: Test + id: ggml-ci + run: | + source ./openvino_toolkit/setupvars.sh + GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cfc78643b0..460a770122 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -739,7 +739,7 @@ jobs: runs-on: ${{ fromJSON(matrix.runner) }} env: - # Sync versions in build.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile + # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile OPENVINO_VERSION_MAJOR: "2026.0" OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886" @@ -1646,160 +1646,6 @@ jobs: run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - ggml-ci-x64-nvidia-cuda: - runs-on: [self-hosted, Linux, X64, NVIDIA] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Test - id: ggml-ci - run: | - nvidia-smi - GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp - - ggml-ci-x64-nvidia-vulkan-cm: - runs-on: [self-hosted, Linux, X64, NVIDIA] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Test - id: ggml-ci - run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp - - ggml-ci-x64-nvidia-vulkan-cm2: - runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Test - id: ggml-ci - run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp - - ggml-ci-x64-cpu-amx: - runs-on: [self-hosted, Linux, X64, CPU, AMX] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Test - id: ggml-ci - run: | - bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp - - # ggml-ci-x64-amd-vulkan: - # runs-on: [self-hosted, Linux, X64, AMD] - - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v6 - - # - name: Test - # id: ggml-ci - # run: | - # vulkaninfo --summary - # GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp - - # ggml-ci-x64-amd-rocm: - # runs-on: [self-hosted, Linux, X64, AMD] - - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v6 - - # - name: Test - # id: ggml-ci - # run: | - # amd-smi static - # GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp - - ggml-ci-mac-metal: - runs-on: [self-hosted, macOS, ARM64] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Test - id: ggml-ci - run: | - GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp - - ggml-ci-mac-webgpu: - runs-on: [self-hosted, macOS, ARM64] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Dawn Dependency - id: dawn-depends - run: | - DAWN_VERSION="v2.0.0" - DAWN_OWNER="reeselevine" - DAWN_REPO="dawn" - DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release" - echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" - curl -L -o artifact.zip \ - "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" - mkdir dawn - unzip artifact.zip - tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1 - - - name: Test - id: ggml-ci - run: | - GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \ - bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp - - ggml-ci-mac-vulkan: - runs-on: [self-hosted, macOS, ARM64] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Test - id: ggml-ci - run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp - - ggml-ci-x64-linux-intel-vulkan: - runs-on: [self-hosted, Linux, X64, Intel] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - with: - persist-credentials: false - - - name: Test - id: ggml-ci - run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp - ggml-ci-arm64-cpu-kleidiai: runs-on: ubuntu-22.04-arm @@ -1826,46 +1672,6 @@ jobs: run: | GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - ggml-ci-x64-intel-openvino-gpu-low-perf: - runs-on: [self-hosted, Linux, X64, Intel, OpenVINO] - - env: - # Sync versions in build.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile - OPENVINO_VERSION_MAJOR: "2026.0" - OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886" - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Use OpenVINO Toolkit Cache - uses: actions/cache@v5 - id: cache-openvino - with: - path: ./openvino_toolkit - key: openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }} - - - name: Setup OpenVINO Toolkit - if: steps.cache-openvino.outputs.cache-hit != 'true' - uses: ./.github/actions/linux-setup-openvino - with: - path: ./openvino_toolkit - version_major: ${{ env.OPENVINO_VERSION_MAJOR }} - version_full: ${{ env.OPENVINO_VERSION_FULL }} - - - name: Install OpenVINO dependencies - run: | - cd ./openvino_toolkit - chmod +x ./install_dependencies/install_openvino_dependencies.sh - echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh - - - name: Test - id: ggml-ci - run: | - source ./openvino_toolkit/setupvars.sh - GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - ubuntu-cpu-cmake-riscv64-native: runs-on: RISCV64 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1620d9a1bc..f329630071 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -238,7 +238,7 @@ jobs: openvino_version: ${{ steps.openvino_version.outputs.value }} env: - # Sync versions in build.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile + # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile OPENVINO_VERSION_MAJOR: "2026.0" OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886" diff --git a/.github/workflows/server-metal.yml b/.github/workflows/server-self-hosted.yml similarity index 56% rename from .github/workflows/server-metal.yml rename to .github/workflows/server-self-hosted.yml index 1d707bef44..a11aea7e89 100644 --- a/.github/workflows/server-metal.yml +++ b/.github/workflows/server-self-hosted.yml @@ -1,4 +1,4 @@ -name: Server-Metal +name: Server (self-hosted) on: workflow_dispatch: # allows manual triggering @@ -14,7 +14,7 @@ on: push: branches: - master - paths: ['.github/workflows/server-metal.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + paths: ['.github/workflows/server-self-hosted.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] env: LLAMA_LOG_COLORS: 1 @@ -28,7 +28,7 @@ concurrency: jobs: server-metal: - runs-on: [self-hosted, macOS, ARM64] + runs-on: [self-hosted, llama-server, macOS, ARM64] name: server-metal (${{ matrix.wf_name }}) strategy: @@ -71,3 +71,42 @@ jobs: pip install -r requirements.txt export ${{ matrix.extra_args }} pytest -v -x -m "not slow" + + server-cuda: + runs-on: [self-hosted, llama-server, Linux, NVIDIA] + + name: server-cuda (${{ matrix.wf_name }}) + strategy: + matrix: + build_type: [Release] + wf_name: ["GPUx1"] + include: + - build_type: Release + extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" + wf_name: "GPUx1, backend-sampling" + fail-fast: false + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Build + id: cmake_build + run: | + cmake -B build -DGGML_SCHED_NO_REALLOC=ON + cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server + + - name: Tests + id: server_integration_tests + if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} + run: | + cd tools/server/tests + python3 -m venv venv + source venv/bin/activate + pip install -r requirements.txt + export ${{ matrix.extra_args }} + pytest -v -x -m "not slow"