From 277ff5fff79d49cc3d2292ddf410ca95dd51c3a9 Mon Sep 17 00:00:00 2001 From: M1DNYT3 <42499082+M1DNYT3@users.noreply.github.com> Date: Fri, 3 Apr 2026 16:06:45 +0300 Subject: [PATCH] docker : bump cuda12 to 12.9.1 (#20920) Co-authored-by: M1DNYT3 Co-authored-by: CISC --- .devops/cuda-new.Dockerfile | 97 ------------------------------------ .github/workflows/docker.yml | 8 +-- 2 files changed, 4 insertions(+), 101 deletions(-) delete mode 100644 .devops/cuda-new.Dockerfile diff --git a/.devops/cuda-new.Dockerfile b/.devops/cuda-new.Dockerfile deleted file mode 100644 index 890230cd26..0000000000 --- a/.devops/cuda-new.Dockerfile +++ /dev/null @@ -1,97 +0,0 @@ -ARG UBUNTU_VERSION=24.04 -# This needs to generally match the container host's environment. -ARG CUDA_VERSION=13.1.1 -# Target the CUDA build image -ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} - -ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_CUDA_DEV_CONTAINER} AS build - -# CUDA architecture to build for (defaults to all supported archs) -ARG CUDA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1 - -ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14 - -WORKDIR /app - -COPY . . - -RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release -j$(nproc) - -RUN mkdir -p /app/lib && \ - find build -name "*.so*" -exec cp -P {} /app/lib \; - -RUN mkdir -p /app/full \ - && cp build/bin/* /app/full \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -## Base image -FROM ${BASE_CUDA_RUN_CONTAINER} AS base - -RUN apt-get update \ - && apt-get install -y libgomp1 curl \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -COPY --from=build /app/lib/ /app - -### Full -FROM base AS full - -COPY --from=build /app/full /app - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3 \ - python3-pip \ - python3-wheel \ - && pip install --break-system-packages --upgrade setuptools \ - && pip install --break-system-packages -r requirements.txt \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - - -ENTRYPOINT ["/app/tools.sh"] - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/full/llama-cli /app/full/llama-completion /app - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/full/llama-server /app - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 569039c421..1d7d6438c7 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -73,10 +73,10 @@ jobs: { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" }, { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" }, { "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" }, - { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, - { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, - { "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, - { "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, + { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, + { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, { "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, { "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, { "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },