Added OpenVINO CI/CD. Updated docs

2025-07-17 17:51:10 -07:00 · 2025-07-17 17:51:10 -07:00 · ea75772e48
parent d61f83c9b7
commit ea75772e48
6 changed files with 314 additions and 39 deletions
--- a/.devops/openvino.Dockerfile
+++ b/.devops/openvino.Dockerfile
@ -0,0 +1,134 @@
+ARG OPENVINO_VERSION_MAJOR=2025.2
+ARG OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
+ARG UBUNTU_VERSION=24.04
+
+# Optional proxy build arguments - empty by default
+ARG http_proxy=
+ARG https_proxy=
+
+## Build Image
+FROM ubuntu:${UBUNTU_VERSION} AS build
+
+# Pass proxy args to build stage
+ARG http_proxy
+ARG https_proxy
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        ca-certificates \
+        gnupg \
+        wget \
+        git \
+        cmake \
+        ninja-build \
+        build-essential \
+        libtbb12 \
+        libcurl4-openssl-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install OpenVINO for Ubuntu 24.04
+ARG OPENVINO_VERSION_MAJOR
+ARG OPENVINO_VERSION_FULL
+RUN mkdir -p /opt/intel && \
+    wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
+    tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
+    mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
+    cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
+    echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \
+    cd - && \
+    ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
+
+ENV OpenVINO_DIR=/opt/intel/openvino
+
+WORKDIR /app
+
+COPY . .
+
+# Build Stage
+RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
+    cmake -B build/ReleaseOV -G Ninja \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DGGML_OPENVINO=ON && \
+    cmake --build build/ReleaseOV -j$(nproc)"
+
+# Copy all necessary libraries
+RUN mkdir -p /app/lib && \
+    find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \
+    find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \
+    find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \;
+
+# Create runtime directories and copy binaries
+RUN mkdir -p /app/full \
+    && cp build/ReleaseOV/bin/* /app/full/ \
+    && cp *.py /app/full \
+    && cp -r gguf-py /app/full \
+    && cp -r requirements /app/full \
+    && cp requirements.txt /app/full \
+    && cp .devops/tools.sh /app/full/tools.sh
+
+## Base Runtime Image
+FROM ubuntu:${UBUNTU_VERSION} AS base
+
+# Pass proxy args to runtime stage
+ARG http_proxy
+ARG https_proxy
+
+RUN apt-get update \
+    && apt-get install -y libgomp1 libtbb12 curl\
+    && apt autoremove -y \
+    && apt clean -y \
+    && rm -rf /tmp/* /var/tmp/* \
+    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
+    && find /var/cache -type f -delete
+
+COPY --from=build /app/lib/ /app/
+
+### Full (all binaries)
+FROM base AS full
+
+ARG http_proxy
+ARG https_proxy
+
+COPY --from=build /app/full /app/
+
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    git \
+    python3 \
+    python3-venv \
+    python3-pip && \
+    python3 -m venv /ov-venv && \
+    /ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \
+    /ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \
+    apt-get autoremove -y && \
+    apt-get clean && \
+    rm -rf /tmp/* /var/tmp/* && \
+    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
+    find /var/cache -type f -delete
+
+ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"]
+
+
+### Light, CLI only
+FROM base AS light
+
+COPY --from=build /app/full/llama-cli /app/
+
+WORKDIR /app
+
+ENTRYPOINT [ "/app/llama-cli" ]
+
+### Server, Server only
+FROM base AS server
+
+ENV LLAMA_ARG_HOST=0.0.0.0
+
+COPY --from=build /app/full/llama-server /app/
+
+WORKDIR /app
+
+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
+ENTRYPOINT [ "/app/llama-server" ]
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -737,6 +737,45 @@ jobs:
            -DGGML_SYCL_F16=ON
          cmake --build build --config Release -j $(nproc)

+  ubuntu-24-cmake-openvino:
+      runs-on: ubuntu-24.04
+
+      steps:
+        - name: Clone
+          id: checkout
+          uses: actions/checkout@v4
+
+        - name: ccache
+          uses: hendrikmuhs/ccache-action@v1.2.16
+          with:
+            key: ubuntu-24-cmake-openvino-no-preset-v1
+            evict-old-files: 1d
+
+        - name: Dependencies
+          id: depends
+          run: |
+            export OPENVINO_VERSION_MAJOR=2025.2
+            export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
+            sudo apt-get update
+            sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
+            sudo mkdir -p /opt/intel
+            wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
+            tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
+            sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+            rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
+            cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+            echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
+            sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
+
+        - name: Build
+          id: cmake_build
+          run: |
+            source /opt/intel/openvino/setupvars.sh
+            cmake -B build/ReleaseOV -G Ninja \
+              -DCMAKE_BUILD_TYPE=Release \
+              -DGGML_OPENVINO=ON
+            cmake --build build/ReleaseOV --config Release -j $(nproc)
+
  build-linux-cross:
    uses: ./.github/workflows/build-linux-cross.yml

--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -47,6 +47,7 @@ jobs:
          - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
          - { tag: "s390x",  dockerfile: ".devops/s390x.Dockerfile",  platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" }
          - { tag: "rocm",   dockerfile: ".devops/rocm.Dockerfile",   platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true,  runs_on: "ubuntu-22.04" }
+          - { tag: "openvino", dockerfile: ".devops/openvino.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
    steps:
      - name: Check out the repo
        uses: actions/checkout@v4
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -231,6 +231,63 @@ jobs:
          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
          name: llama-bin-ubuntu-vulkan-x64.tar.gz

+  ubuntu-24-openvino:
+    runs-on: ubuntu-24.04
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2.16
+        with:
+          key: ubuntu-24-cmake-openvino-release-no-preset-v1 
+          evict-old-files: 1d
+
+      - name: Dependencies
+        id: depends
+        run: |
+          export OPENVINO_VERSION_MAJOR=2025.2
+          export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
+          sudo apt-get update
+          sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
+          sudo mkdir -p /opt/intel
+          wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
+          tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
+          sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+          rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
+          cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+          echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
+          sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
+
+      - name: Build
+        id: cmake_build
+        run: |
+          source /opt/intel/openvino/setupvars.sh
+          cmake -B build/ReleaseOV -G Ninja \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DGGML_OPENVINO=ON
+          cmake --build build/ReleaseOV --config Release -j $(nproc)
+
+      - name: Determine tag name
+        id: tag
+        uses: ./.github/actions/get-tag-name
+
+      - name: Pack artifacts
+        id: pack_artifacts
+        run: |
+          cp LICENSE ./build/ReleaseOV/bin/
+          zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip ./build/ReleaseOV/bin/*
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip
+          name: llama-bin-ubuntu-openvino-x64.zip
+
  windows-cpu:
    runs-on: windows-2025

--- a/ci/run.sh
+++ b/ci/run.sh
@ -25,6 +25,9 @@
 # # with KLEIDIAI support
 # GG_BUILD_KLEIDIAI=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
 #
+# # with OPENVINO support
+# GG_BUILD_OPENVINO=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+#

 if [ -z "$2" ]; then
    echo "usage: $0 <output-dir> <mnt-dir>"
@ -165,6 +168,15 @@ if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
        -DBUILD_SHARED_LIBS=OFF"
 fi

+if [ ! -z ${GG_BUILD_OPENVINO} ]; then
+    if [ -z ${OpenVINO_DIR} ]; then
+        echo "OpenVINO_DIR not found, please install OpenVINO via archives and enable it by:"
+        echo "source /opt/intel/openvino/setupvars.sh"
+        exit 1
+    fi
+    CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON"
+fi
+
 ## helpers

 # download a file if it does not exist or if it is outdated
--- a/docs/build.md
+++ b/docs/build.md
@ -25,7 +25,7 @@ The following sections describe how to build with different backends and options
 * [Arm® KleidiAI™](#arm-kleidiai)
 * [OpenCL](#opencl)
 * [Android](#android-1)
-* [OPENVINO](#openvino)
+* [OpenVINO](#openvino)
 * [Notes about GPU-accelerated backends](#notes-about-gpu-accelerated-backends)

 ## CPU Build
@ -696,20 +696,48 @@ Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/m

 To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md)

-## OPENVINO
+## OpenVINO

-[OpenVINO](https://docs.openvino.ai/2025/index.html) is a open-source toolkit for optimizing and deploying performant AI inference, specifically designed for Intel hardware including CPUs, GPUs, and NPUs in the cloud, on-prem, and on the edge alike. The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.
+[OpenVINO](https://docs.openvino.ai/2025/index.html) is an open-source toolkit for optimizing and deploying high-performance AI inference, specifically designed for Intel hardware, including CPUs, GPUs, and NPUs, in the cloud, on-premises, and on the edge. 
+The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.

 Follow the instructions below to install OpenVINO runtime and build llama.cpp with OpenVINO support.

+### Prerequisites
+
+- Linux or Windows system with Intel hardware (CPU, GPU, or NPU)
+- **For Intel GPU or NPU Usage**: Install the appropriate hardware drivers for your Intel GPU or NPU. For detailed instructions, see: [Additional Configurations for Hardware Acceleration](https://docs.openvino.ai/2025/get-started/install-openvino/configurations.html).
+- Git, CMake, and Ninja software tools are needed for building
+```bash
+  sudo apt-get update
+  sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
+```
+
 ### 1. Install OpenVINO Runtime

 - Follow the guide to install OpenVINO Runtime from an archive file: **[Install OpenVINO™ Runtime on Linux from an Archive File.](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html)**

- After installation, make sure to [source the environment setup script](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html#step-2-configure-the-environment):
+<details>
+<summary>📦 Click to expand OpenVINO 2025.2 installation commands</summary>
+<br>
+
 ```bash
-source /opt/intel/openvino_2025.1.0/setupvars.sh
+export OPENVINO_VERSION_MAJOR=2025.2
+export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
+sudo apt-get update
+sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
+sudo mkdir -p /opt/intel
+wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
+tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
+sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
+cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
+echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
+sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
+source /opt/intel/openvino/setupvars.sh
 ```
+</details>
+
 - Verify OpenVINO is initialized properly
 ```bash
 echo $OpenVINO_DIR
@ -725,23 +753,26 @@ cd llama.cpp
 git switch dev_backend_openvino

 # Build with OpenVINO support
-cmake --preset ReleaseOV
-cmake --build build/ReleaseOV --parallel
-
+source /opt/intel/openvino/setupvars.sh
+cmake -B build/ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON
+cmake --build build/ReleaseOV --config Release -j $(nproc)
 ```

 ### 3. Download Sample Model

-Download the Phi-3 mini model for testing:
+Download models for testing:

 ```bash
 # Create models directory
-mkdir -p ~/models/Phi-3-mini-4k-instruct-gguf
+mkdir -p ~/models/

-# Download model file
+# Download model file: Llama-3.2-1B-Instruct.fp16.gguf
+wget https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.fp16.gguf \
+     -O ~/models/Llama-3.2-1B-Instruct.fp16.gguf
+
+# Download model file: Phi-3-mini-4k-instruct-fp16.gguf
 wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf \
-     -O ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf
-
+     -O ~/models/Phi-3-mini-4k-instruct-fp16.gguf
 ```

 ### 4. Run inference with OpenVINO backend:
@ -750,28 +781,19 @@ When using the OpenVINO backend, the first inference token may have slightly hig

 ```bash
 export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
+# Default device is GPU.
+# If not set, automatically selects the first available device in priority order: GPU, CPU, NPU.
+export GGML_OPENVINO_DEVICE=GPU

-./build/ReleaseOV/bin/llama-simple \
-    -m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
-    -n 50 \
-    "Hello, my name is "
+./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "

 ```

-### Using Llama.cpp's Built-in CPU Backend (for Comparison)
-
-To compare performance with the deafult CPU backend:
-
+To run in chat mode:
 ```bash
-# Build CPU-only version
-cmake --preset ReleaseCPU
-cmake --build build/ReleaseCPU --parallel
+export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache

-# Run with Default CPU backend
-./build/ReleaseCPU/bin/llama-simple \
-    -m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
-    -n 50 \
-    "Hello, my name is "
+./build/ReleaseOV/bin/llama-cli -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "

 ```

@ -779,13 +801,14 @@ cmake --build build/ReleaseCPU --parallel

 Control OpenVINO behavior using these environment variables:

-   **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO.
+-   **`GGML_OPENVINO_DEVICE`**: Specify the target device for OpenVINO inference.  If not set, automatically selects the first available device in priority order: GPU, CPU, NPU. When set to `NPU` to use Intel NPUs, it enables static compilation mode for optimal performance. 
+-   **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO. Note: Not supported when using NPU devices yet.  
 -   **`GGML_OPENVINO_WEIGHT_AS_INPUT`**: Pass the weights as input to the OpenVINO model instead of creating Constant nodes for them.
-   **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling
-   **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`
-   **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps
-   **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging
-   **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging
+-   **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling.
+-   **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`.
+-   **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps.
+-   **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging.
+-   **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging.

 ### Example with Profiling

@ -793,11 +816,20 @@ Control OpenVINO behavior using these environment variables:
 export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
 export GGML_OPENVINO_PROFILING=1

-./build/ReleaseOV/bin/llama-simple \
-    -m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
-    -n 50 \
-    "Hello, my name is "
+./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
+```

+### Using Llama.cpp's Built-in CPU Backend (for Comparison)
+
+To compare performance with the default CPU backend:
+
+```bash
+# Build CPU-only version
+cmake --preset ReleaseCPU
+cmake --build build/ReleaseCPU --parallel
+
+# Run with the default CPU backend
+./build/ReleaseCPU/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
 ```

 ## Notes about GPU-accelerated backends