diff --git a/.devops/cpu.Dockerfile b/.devops/cpu.Dockerfile index e5a21eeeb2..d6579ecf1a 100644 --- a/.devops/cpu.Dockerfile +++ b/.devops/cpu.Dockerfile @@ -36,7 +36,7 @@ RUN mkdir -p /app/full \ FROM ubuntu:$UBUNTU_VERSION AS base RUN apt-get update \ - && apt-get install -y libgomp1 curl\ + && apt-get install -y libgomp1 curl \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ diff --git a/.devops/cuda-new.Dockerfile b/.devops/cuda-new.Dockerfile index 98dc147d7e..890230cd26 100644 --- a/.devops/cuda-new.Dockerfile +++ b/.devops/cuda-new.Dockerfile @@ -1,6 +1,6 @@ ARG UBUNTU_VERSION=24.04 # This needs to generally match the container host's environment. -ARG CUDA_VERSION=13.1.0 +ARG CUDA_VERSION=13.1.1 # Target the CUDA build image ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} @@ -12,7 +12,9 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build ARG CUDA_DOCKER_ARCH=default RUN apt-get update && \ - apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1 + apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1 + +ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14 WORKDIR /app @@ -39,7 +41,7 @@ RUN mkdir -p /app/full \ FROM ${BASE_CUDA_RUN_CONTAINER} AS base RUN apt-get update \ - && apt-get install -y libgomp1 curl\ + && apt-get install -y libgomp1 curl \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile index 52f103bc31..b3f6ccfc98 100644 --- a/.devops/cuda.Dockerfile +++ b/.devops/cuda.Dockerfile @@ -1,6 +1,6 @@ -ARG UBUNTU_VERSION=22.04 +ARG UBUNTU_VERSION=24.04 # This needs to generally match the container host's environment. -ARG CUDA_VERSION=12.4.0 +ARG CUDA_VERSION=12.8.1 # Target the CUDA build image ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} @@ -12,7 +12,9 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build ARG CUDA_DOCKER_ARCH=default RUN apt-get update && \ - apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1 + apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1 + +ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14 WORKDIR /app @@ -39,7 +41,7 @@ RUN mkdir -p /app/full \ FROM ${BASE_CUDA_RUN_CONTAINER} AS base RUN apt-get update \ - && apt-get install -y libgomp1 curl\ + && apt-get install -y libgomp1 curl \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ @@ -60,7 +62,8 @@ RUN apt-get update \ git \ python3 \ python3-pip \ - && pip install --upgrade pip setuptools wheel \ + python3-wheel \ + && pip install --break-system-packages --upgrade setuptools \ && pip install --break-system-packages -r requirements.txt \ && apt autoremove -y \ && apt clean -y \ diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile index 4568cc08a4..955a2962ff 100644 --- a/.devops/intel.Dockerfile +++ b/.devops/intel.Dockerfile @@ -51,7 +51,7 @@ RUN mkdir /tmp/neo/ && cd /tmp/neo/ \ && dpkg --install *.deb RUN apt-get update \ - && apt-get install -y libgomp1 curl\ + && apt-get install -y libgomp1 curl \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile index 9eb4985204..665a76f58c 100644 --- a/.devops/musa.Dockerfile +++ b/.devops/musa.Dockerfile @@ -46,7 +46,7 @@ RUN mkdir -p /app/full \ FROM ${BASE_MUSA_RUN_CONTAINER} AS base RUN apt-get update \ - && apt-get install -y libgomp1 curl\ + && apt-get install -y libgomp1 curl \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ diff --git a/.devops/openvino.Dockerfile b/.devops/openvino.Dockerfile index e22ef16c7f..3ee4dd2018 100644 --- a/.devops/openvino.Dockerfile +++ b/.devops/openvino.Dockerfile @@ -78,7 +78,7 @@ ARG http_proxy ARG https_proxy RUN apt-get update \ - && apt-get install -y libgomp1 libtbb12 curl\ + && apt-get install -y libgomp1 libtbb12 curl \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile index 830fe19e3e..96aa9dfd40 100644 --- a/.devops/rocm.Dockerfile +++ b/.devops/rocm.Dockerfile @@ -58,7 +58,7 @@ RUN mkdir -p /app/full \ FROM ${BASE_ROCM_DEV_CONTAINER} AS base RUN apt-get update \ - && apt-get install -y libgomp1 curl\ + && apt-get install -y libgomp1 curl \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ @@ -79,7 +79,7 @@ RUN apt-get update \ git \ python3-pip \ python3 \ - python3-wheel\ + python3-wheel \ && pip install --break-system-packages --upgrade setuptools \ && pip install --break-system-packages -r requirements.txt \ && apt autoremove -y \ diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile index 3112ec85ef..98036c5fd5 100644 --- a/.devops/vulkan.Dockerfile +++ b/.devops/vulkan.Dockerfile @@ -49,17 +49,20 @@ COPY --from=build /app/full /app WORKDIR /app +ENV PATH="/root/.venv/bin:/root/.local/bin:${PATH}" + +# Flag for compatibility with pip +ARG UV_INDEX_STRATEGY="unsafe-best-match" RUN apt-get update \ && apt-get install -y \ build-essential \ + curl \ git \ - python3.13 \ - python3.13-dev \ - python3-pip \ - python3-wheel \ - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 100 \ - && pip install --break-system-packages --upgrade setuptools \ - && pip install --break-system-packages -r requirements.txt \ + ca-certificates \ + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && uv python install 3.13 \ + && uv venv --python 3.13 /root/.venv \ + && uv pip install --python /root/.venv/bin/python -r requirements.txt \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 75df76a4ed..569039c421 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -25,184 +25,13 @@ permissions: packages: write jobs: - push_to_registry: - name: Push Docker image to Docker Hub - - runs-on: ${{ matrix.config.runs_on }} - env: - COMMIT_SHA: ${{ github.sha }} - strategy: - fail-fast: false - matrix: - config: - # Multi-stage build - - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/arm64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04" } - - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04" } - - { tag: "cuda cuda12", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-24.04", cuda_version: "12.4.0", ubuntu_version: "22.04" } - - { tag: "cuda13", dockerfile: ".devops/cuda-new.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-24.04", cuda_version: "13.1.0", ubuntu_version: "24.04" } - - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-24.04" } - - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-24.04" } - - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04" } - - { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04-s390x" } - - { tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-24.04" } - - { tag: "openvino", dockerfile: ".devops/openvino.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04" } - steps: - - name: Check out the repo - uses: actions/checkout@v6 - with: - fetch-depth: 0 # preserve git history, so we can determine the build number - - - name: Set up QEMU - if: ${{ matrix.config.tag != 's390x' }} - uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 - with: - image: tonistiigi/binfmt:qemu-v10.2.1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 - - - name: Log in to Docker Hub - uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Determine source tag name - id: srctag - uses: ./.github/actions/get-tag-name - env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - - - name: Determine image tag name - id: tag - shell: bash - run: | - REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case - REPO_NAME="${{ github.event.repository.name }}" - PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" - - # list all tags possible - tags="${{ matrix.config.tag }}" - for tag in $tags; do - if [[ "$tag" == "cpu" ]]; then - TYPE="" - else - TYPE="-$tag" - fi - CACHETAGS="${PREFIX}buildcache${TYPE}" - FULLTAGS="${FULLTAGS:+$FULLTAGS,}${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}" - LIGHTTAGS="${LIGHTTAGS:+$LIGHTTAGS,}${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}" - SERVERTAGS="${SERVERTAGS:+$SERVERTAGS,}${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}" - done - echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT - echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT - echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT - echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT - echo "cache_output_tags=$CACHETAGS" # print out for debugging - echo "full_output_tags=$FULLTAGS" # print out for debugging - echo "light_output_tags=$LIGHTTAGS" # print out for debugging - echo "server_output_tags=$SERVERTAGS" # print out for debugging - env: - GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' - - - name: Free Disk Space (Ubuntu) - if: ${{ matrix.config.free_disk_space == true }} - uses: ggml-org/free-disk-space@v1.3.1 - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: true - swap-storage: true - - - name: Build and push Full Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }} - uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 - with: - context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.full_output_tags }} - file: ${{ matrix.config.dockerfile }} - target: full - provenance: false - build-args: | - ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} - ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} - # using github experimental cache - #cache-from: type=gha - #cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache - # using registry cache (no storage limit) - cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }} - cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max - - - name: Build and push Light Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }} - uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 - with: - context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.light_output_tags }} - file: ${{ matrix.config.dockerfile }} - target: light - provenance: false - build-args: | - ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} - ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} - # using github experimental cache - #cache-from: type=gha - #cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache - # using registry cache (no storage limit) - cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }} - cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max - - - name: Build and push Server Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }} - uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 - with: - context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.server_output_tags }} - file: ${{ matrix.config.dockerfile }} - target: server - provenance: false - build-args: | - ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} - ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} - # using github experimental cache - #cache-from: type=gha - #cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache - # using registry cache (no storage limit) - cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }} - cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max - create_tag: name: Create and push git tag - runs-on: ubuntu-22.04 + runs-on: ubuntu-slim permissions: contents: write + outputs: + source_tag: ${{ steps.srctag.outputs.name }} steps: - name: Clone @@ -223,3 +52,391 @@ jobs: run: | git tag ${{ steps.srctag.outputs.name }} || exit 0 git push origin ${{ steps.srctag.outputs.name }} || exit 0 + + prepare_matrices: + name: Prepare Docker matrices + runs-on: ubuntu-24.04 + outputs: + build_matrix: ${{ steps.matrices.outputs.build_matrix }} + merge_matrix: ${{ steps.matrices.outputs.merge_matrix }} + + steps: + - name: Generate build and merge matrices + id: matrices + shell: bash + run: | + set -euo pipefail + + # Keep all build targets in one place and derive merge targets from it. + cat > build-matrix.json <<'JSON' + [ + { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" }, + { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" }, + { "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" }, + { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, + { "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, + { "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" }, + { "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" }, + { "tag": "rocm", "dockerfile": ".devops/rocm.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "openvino", "dockerfile": ".devops/openvino.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" } + ] + JSON + + BUILD_MATRIX="$(jq -c . build-matrix.json)" + MERGE_MATRIX="$(jq -c ' + reduce .[] as $entry ({}; .[$entry.tag] |= ( + . // { + tag: $entry.tag, + arches: [], + full: false, + light: false, + server: false + } + | .full = (.full or ($entry.full // false)) + | .light = (.light or ($entry.light // false)) + | .server = (.server or ($entry.server // false)) + | .arches += [($entry.platforms | sub("^linux/"; ""))] + )) + # Backward compatibility: s390x tags are aliases of cpu for the linux/s390x platform. + | if (has("cpu") and (((.cpu.arches // []) | index("s390x")) != null)) then + . + { + s390x: { + tag: "s390x", + arches: ["s390x"], + full: .cpu.full, + light: .cpu.light, + server: .cpu.server + } + } + else + . + end + | [.[] | .arches = (.arches | unique | sort | join(" "))] + ' build-matrix.json)" + + echo "build_matrix=$BUILD_MATRIX" >> "$GITHUB_OUTPUT" + echo "merge_matrix=$MERGE_MATRIX" >> "$GITHUB_OUTPUT" + + push_to_registry: + name: Push Docker image to Docker Registry + needs: [prepare_matrices, create_tag] + + runs-on: ${{ matrix.config.runs_on }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJSON(needs.prepare_matrices.outputs.build_matrix) }} + steps: + - name: Check out the repo + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ needs.create_tag.outputs.source_tag }} + + - name: Set up QEMU + if: ${{ contains(matrix.config.platforms, 'linux/amd64') }} + uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4 + with: + image: tonistiigi/binfmt:qemu-v10.2.1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 + + - name: Log in to Docker Registry + uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Determine image metadata + id: meta + shell: bash + run: | + set -euo pipefail + + REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case + REPO_NAME="${{ github.event.repository.name }}" + IMAGE_REPO="ghcr.io/${REPO_OWNER}/${REPO_NAME}" + PREFIX="${IMAGE_REPO}:" + PLATFORM="${{ matrix.config.platforms }}" + ARCH_SUFFIX="${PLATFORM#linux/}" + + # list all tags possible + tags="${{ matrix.config.tag }}" + for tag in $tags; do + if [[ "$tag" == "cpu" ]]; then + TYPE="" + else + TYPE="-$tag" + fi + CACHETAG="${PREFIX}buildcache${TYPE}-${ARCH_SUFFIX}" + done + + SAFE_TAGS="$(echo "$tags" | tr ' ' '_')" + + echo "image_repo=$IMAGE_REPO" >> $GITHUB_OUTPUT + echo "arch_suffix=$ARCH_SUFFIX" >> $GITHUB_OUTPUT + echo "cache_output_tag=$CACHETAG" >> $GITHUB_OUTPUT + echo "digest_artifact_suffix=${SAFE_TAGS}-${ARCH_SUFFIX}" >> $GITHUB_OUTPUT + echo "cache_output_tag=$CACHETAG" # print out for debugging + env: + GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' + + - name: Free Disk Space (Ubuntu) + if: ${{ matrix.config.free_disk_space == true }} + uses: ggml-org/free-disk-space@v1.3.1 + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + + - name: Build and push Full Docker image by digest + id: build_full + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }} + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7 + with: + context: . + platforms: ${{ matrix.config.platforms }} + outputs: type=image,name=${{ steps.meta.outputs.image_repo }},push-by-digest=true,name-canonical=true,push=true + file: ${{ matrix.config.dockerfile }} + target: full + provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} + # using github experimental cache + #cache-from: type=gha + #cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache + # using registry cache (no storage limit) + cache-from: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }} + cache-to: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }},mode=max + + - name: Build and push Light Docker image by digest + id: build_light + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }} + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7 + with: + context: . + platforms: ${{ matrix.config.platforms }} + outputs: type=image,name=${{ steps.meta.outputs.image_repo }},push-by-digest=true,name-canonical=true,push=true + file: ${{ matrix.config.dockerfile }} + target: light + provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} + # using github experimental cache + #cache-from: type=gha + #cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache + # using registry cache (no storage limit) + cache-from: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }} + cache-to: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }},mode=max + + - name: Build and push Server Docker image by digest + id: build_server + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }} + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7 + with: + context: . + platforms: ${{ matrix.config.platforms }} + outputs: type=image,name=${{ steps.meta.outputs.image_repo }},push-by-digest=true,name-canonical=true,push=true + file: ${{ matrix.config.dockerfile }} + target: server + provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} + # using github experimental cache + #cache-from: type=gha + #cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache + # using registry cache (no storage limit) + cache-from: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }} + cache-to: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }},mode=max + + - name: Export digest metadata + shell: bash + run: | + set -euo pipefail + + TAGS="${{ matrix.config.tag }}" + ARCH_SUFFIX="${{ steps.meta.outputs.arch_suffix }}" + DIGEST_FILE="/tmp/digests/${{ steps.meta.outputs.digest_artifact_suffix }}.tsv" + mkdir -p /tmp/digests + + add_digest_rows() { + local image_type="$1" + local digest="$2" + + if [[ -z "$digest" ]]; then + echo "Missing digest for image_type=${image_type}" >&2 + exit 1 + fi + + for tag in $TAGS; do + printf '%s\t%s\t%s\t%s\n' "$tag" "$ARCH_SUFFIX" "$image_type" "$digest" >> "$DIGEST_FILE" + done + } + + if [[ "${{ matrix.config.full }}" == "true" ]]; then + add_digest_rows "full" "${{ steps.build_full.outputs.digest }}" + fi + + if [[ "${{ matrix.config.light }}" == "true" ]]; then + add_digest_rows "light" "${{ steps.build_light.outputs.digest }}" + fi + + if [[ "${{ matrix.config.server }}" == "true" ]]; then + add_digest_rows "server" "${{ steps.build_server.outputs.digest }}" + fi + + - name: Upload digest metadata + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: digests-${{ steps.meta.outputs.digest_artifact_suffix }} + path: /tmp/digests/${{ steps.meta.outputs.digest_artifact_suffix }}.tsv + if-no-files-found: error + + merge_arch_tags: + name: Create shared tags from digests + needs: [prepare_matrices, push_to_registry, create_tag] + runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + config: ${{ fromJSON(needs.prepare_matrices.outputs.merge_matrix) }} + + steps: + - name: Check out the repo + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Download digest metadata + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 + with: + pattern: digests-* + path: /tmp/digests + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 + + - name: Log in to Docker Registry + uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create tags from digests + shell: bash + run: | + set -euo pipefail + + REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case + REPO_NAME="${{ github.event.repository.name }}" + IMAGE_REPO="ghcr.io/${REPO_OWNER}/${REPO_NAME}" + PREFIX="${IMAGE_REPO}:" + SRC_TAG="${{ needs.create_tag.outputs.source_tag }}" + TAGS="${{ matrix.config.tag }}" + ARCHES="${{ matrix.config.arches }}" + DIGEST_GLOB="/tmp/digests/*.tsv" + + if ! ls ${DIGEST_GLOB} >/dev/null 2>&1; then + echo "No digest metadata found in /tmp/digests" >&2 + exit 1 + fi + + if [[ -z "$SRC_TAG" ]]; then + echo "Missing source tag from create_tag" >&2 + exit 1 + fi + + find_digest() { + local tag_name="$1" + local arch="$2" + local image_type="$3" + local digest + + digest="$(awk -F '\t' -v t="$tag_name" -v a="$arch" -v i="$image_type" '$1 == t && $2 == a && $3 == i { print $4; exit }' ${DIGEST_GLOB})" + + # Backward compatibility: s390x tags are aliases of cpu for the linux/s390x platform. + if [[ -z "$digest" && "$tag_name" == "s390x" && "$arch" == "s390x" ]]; then + digest="$(awk -F '\t' -v t="cpu" -v a="$arch" -v i="$image_type" '$1 == t && $2 == a && $3 == i { print $4; exit }' ${DIGEST_GLOB})" + fi + + if [[ -z "$digest" ]]; then + echo "Missing digest for tag=${tag_name} arch=${arch} image_type=${image_type}" >&2 + exit 1 + fi + + echo "$digest" + } + + create_manifest_tags() { + local image_type="$1" + local tag_name="$2" + local suffix="$3" + + local merged_tag="${PREFIX}${image_type}${suffix}" + local merged_versioned_tag="${merged_tag}-${SRC_TAG}" + + local refs=() + + for arch in $ARCHES; do + local digest + digest="$(find_digest "$tag_name" "$arch" "$image_type")" + refs+=("${IMAGE_REPO}@${digest}") + done + + echo "Creating ${merged_tag} from ${refs[*]}" + docker buildx imagetools create --tag "${merged_tag}" "${refs[@]}" + + echo "Creating ${merged_versioned_tag} from ${refs[*]}" + docker buildx imagetools create --tag "${merged_versioned_tag}" "${refs[@]}" + } + + for tag in $TAGS; do + if [[ "$tag" == "cpu" ]]; then + TYPE="" + else + TYPE="-$tag" + fi + + if [[ "${{ matrix.config.full }}" == "true" ]]; then + create_manifest_tags "full" "$tag" "$TYPE" + fi + + if [[ "${{ matrix.config.light }}" == "true" ]]; then + create_manifest_tags "light" "$tag" "$TYPE" + fi + + if [[ "${{ matrix.config.server }}" == "true" ]]; then + create_manifest_tags "server" "$tag" "$TYPE" + fi + done + env: + GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' diff --git a/docs/docker.md b/docs/docker.md index 9fb5e65eaf..7f99bfaad6 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -13,24 +13,30 @@ We have three Docker images available for this project: Additionally, there the following images, similar to the above: -- `ghcr.io/ggml-org/llama.cpp:full-cuda`: Same as `full` but compiled with CUDA support. (platforms: `linux/amd64`) -- `ghcr.io/ggml-org/llama.cpp:light-cuda`: Same as `light` but compiled with CUDA support. (platforms: `linux/amd64`) -- `ghcr.io/ggml-org/llama.cpp:server-cuda`: Same as `server` but compiled with CUDA support. (platforms: `linux/amd64`) -- `ghcr.io/ggml-org/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) -- `ghcr.io/ggml-org/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) -- `ghcr.io/ggml-org/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:full-cuda`: Same as `full` but compiled with CUDA 12 support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:full-cuda13`: Same as `full` but compiled with CUDA 13 support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:light-cuda`: Same as `light` but compiled with CUDA 12 support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:light-cuda13`: Same as `light` but compiled with CUDA 13 support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:server-cuda`: Same as `server` but compiled with CUDA 12 support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:server-cuda13`: Same as `server` but compiled with CUDA 13 support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:full-musa`: Same as `full` but compiled with MUSA support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:light-musa`: Same as `light` but compiled with MUSA support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:server-musa`: Same as `server` but compiled with MUSA support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:full-intel`: Same as `full` but compiled with SYCL support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:light-intel`: Same as `light` but compiled with SYCL support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:server-intel`: Same as `server` but compiled with SYCL support. (platforms: `linux/amd64`) -- `ghcr.io/ggml-org/llama.cpp:full-vulkan`: Same as `full` but compiled with Vulkan support. (platforms: `linux/amd64`) -- `ghcr.io/ggml-org/llama.cpp:light-vulkan`: Same as `light` but compiled with Vulkan support. (platforms: `linux/amd64`) -- `ghcr.io/ggml-org/llama.cpp:server-vulkan`: Same as `server` but compiled with Vulkan support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:full-vulkan`: Same as `full` but compiled with Vulkan support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:light-vulkan`: Same as `light` but compiled with Vulkan support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggml-org/llama.cpp:server-vulkan`: Same as `server` but compiled with Vulkan support. (platforms: `linux/amd64`, `linux/arm64`) - `ghcr.io/ggml-org/llama.cpp:full-openvino`: Same as `full` but compiled with OpenVino support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:light-openvino`: Same as `light` but compiled with OpenVino support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:server-openvino`: Same as `server` but compiled with OpenVino support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:full-s390x`: Identical to `full`, an alias for the `s390x` platform. (platforms: `linux/s390x`) +- `ghcr.io/ggml-org/llama.cpp:light-s390x`: Identical to `light`, an alias for the `s390x` platform. (platforms: `linux/s390x`) +- `ghcr.io/ggml-org/llama.cpp:server-s390x`: Identical to `server`, an alias for the `s390x` platform. (platforms: `linux/s390x`) The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA, ROCm or MUSA library, you'll need to build the images locally for now). @@ -82,7 +88,7 @@ You may want to pass in some different `ARGS`, depending on the CUDA environment The defaults are: -- `CUDA_VERSION` set to `12.4.0` +- `CUDA_VERSION` set to `12.8.1` - `CUDA_DOCKER_ARCH` set to the cmake build default, which includes all the supported architectures The resulting images, are essentially the same as the non-CUDA images: