From f4b5bf2f329a1c5aa9af5380344f81fc3e1e24df Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 14 Apr 2026 15:58:09 +0300 Subject: [PATCH] ci : re-enable mac workflows (#21894) * ci : re-enable mac workflows * vulkan : fix compile warning --- .github/workflows/build-self-hosted.yml | 108 +++++++++++------------ .github/workflows/server-self-hosted.yml | 77 ++++++++-------- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 2 +- 3 files changed, 93 insertions(+), 94 deletions(-) diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml index eeea820ba1..0efe877162 100644 --- a/.github/workflows/build-self-hosted.yml +++ b/.github/workflows/build-self-hosted.yml @@ -141,61 +141,59 @@ jobs: # amd-smi static # GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp - # TODO: sandbox Mac runners - # ggml-ci-mac-metal: - # runs-on: [self-hosted, macOS, ARM64] - # - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v6 - # - # - name: Test - # id: ggml-ci - # run: | - # GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp - # - # ggml-ci-mac-webgpu: - # runs-on: [self-hosted, macOS, ARM64] - # - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v6 - # - # - name: Dawn Dependency - # id: dawn-depends - # run: | - # DAWN_VERSION="v2.0.0" - # DAWN_OWNER="reeselevine" - # DAWN_REPO="dawn" - # DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release" - # echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" - # curl -L -o artifact.zip \ - # "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" - # mkdir dawn - # unzip artifact.zip - # tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1 - # - # - name: Test - # id: ggml-ci - # run: | - # GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \ - # bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp - # - # ggml-ci-mac-vulkan: - # runs-on: [self-hosted, macOS, ARM64] - # - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v6 - # - # - name: Test - # id: ggml-ci - # run: | - # vulkaninfo --summary - # GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + ggml-ci-mac-metal: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-mac-webgpu: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dawn Dependency + id: dawn-depends + run: | + DAWN_VERSION="v20260317.182325" + DAWN_OWNER="google" + DAWN_REPO="dawn" + DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-macos-latest-Release" + echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz" + curl -L -o artifact.tar.gz \ + "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz" + mkdir dawn + tar -xvf artifact.tar.gz -C dawn --strip-components=1 + + - name: Test + id: ggml-ci + run: | + GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \ + bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-mac-vulkan: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp ggml-ci-linux-intel-vulkan: runs-on: [self-hosted, Linux, Intel] diff --git a/.github/workflows/server-self-hosted.yml b/.github/workflows/server-self-hosted.yml index 29bd79690a..4b9f4b631a 100644 --- a/.github/workflows/server-self-hosted.yml +++ b/.github/workflows/server-self-hosted.yml @@ -84,41 +84,42 @@ jobs: export ${{ matrix.extra_args }} pytest -v -x -m "not slow" - server-cuda: - runs-on: [self-hosted, llama-server, Linux, NVIDIA] - - name: server-cuda (${{ matrix.wf_name }}) - strategy: - matrix: - build_type: [Release] - wf_name: ["GPUx1"] - include: - - build_type: Release - extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" - wf_name: "GPUx1, backend-sampling" - fail-fast: false - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: Build - id: cmake_build - run: | - cmake -B build -DGGML_SCHED_NO_REALLOC=ON - cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server - - - name: Tests - id: server_integration_tests - if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} - run: | - cd tools/server/tests - python3 -m venv venv - source venv/bin/activate - pip install -r requirements.txt - export ${{ matrix.extra_args }} - pytest -v -x -m "not slow" + # TODO: provision CUDA runner + # server-cuda: + # runs-on: [self-hosted, llama-server, Linux, NVIDIA] + # + # name: server-cuda (${{ matrix.wf_name }}) + # strategy: + # matrix: + # build_type: [Release] + # wf_name: ["GPUx1"] + # include: + # - build_type: Release + # extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" + # wf_name: "GPUx1, backend-sampling" + # fail-fast: false + # + # steps: + # - name: Clone + # id: checkout + # uses: actions/checkout@v6 + # with: + # fetch-depth: 0 + # ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + # + # - name: Build + # id: cmake_build + # run: | + # cmake -B build -DGGML_SCHED_NO_REALLOC=ON + # cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server + # + # - name: Tests + # id: server_integration_tests + # if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} + # run: | + # cd tools/server/tests + # python3 -m venv venv + # source venv/bin/activate + # pip install -r requirements.txt + # export ${{ matrix.extra_args }} + # pytest -v -x -m "not slow" diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 8d0e109365..aa3fe06d5a 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -3485,7 +3485,7 @@ static bool create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) { dev_desc.requiredFeatureCount = required_features.size(); dev_desc.SetDeviceLostCallback( wgpu::CallbackMode::AllowSpontaneous, - [ctx](const wgpu::Device & device, wgpu::DeviceLostReason reason, wgpu::StringView message) { + [](const wgpu::Device & device, wgpu::DeviceLostReason reason, wgpu::StringView message) { if (reason == wgpu::DeviceLostReason::Destroyed) { return; }