Added OpenVINO CI/CD. Updated docs

This commit is contained in:
ravi9 2025-07-17 17:51:10 -07:00 committed by Mustafa Cavus
parent d61f83c9b7
commit ea75772e48
6 changed files with 314 additions and 39 deletions

134
.devops/openvino.Dockerfile Normal file
View File

@ -0,0 +1,134 @@
ARG OPENVINO_VERSION_MAJOR=2025.2
ARG OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
ARG UBUNTU_VERSION=24.04
# Optional proxy build arguments - empty by default
ARG http_proxy=
ARG https_proxy=
## Build Image
FROM ubuntu:${UBUNTU_VERSION} AS build
# Pass proxy args to build stage
ARG http_proxy
ARG https_proxy
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
gnupg \
wget \
git \
cmake \
ninja-build \
build-essential \
libtbb12 \
libcurl4-openssl-dev && \
rm -rf /var/lib/apt/lists/*
# Install OpenVINO for Ubuntu 24.04
ARG OPENVINO_VERSION_MAJOR
ARG OPENVINO_VERSION_FULL
RUN mkdir -p /opt/intel && \
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \
cd - && \
ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
ENV OpenVINO_DIR=/opt/intel/openvino
WORKDIR /app
COPY . .
# Build Stage
RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
cmake -B build/ReleaseOV -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENVINO=ON && \
cmake --build build/ReleaseOV -j$(nproc)"
# Copy all necessary libraries
RUN mkdir -p /app/lib && \
find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \
find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \
find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \;
# Create runtime directories and copy binaries
RUN mkdir -p /app/full \
&& cp build/ReleaseOV/bin/* /app/full/ \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base Runtime Image
FROM ubuntu:${UBUNTU_VERSION} AS base
# Pass proxy args to runtime stage
ARG http_proxy
ARG https_proxy
RUN apt-get update \
&& apt-get install -y libgomp1 libtbb12 curl\
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app/
### Full (all binaries)
FROM base AS full
ARG http_proxy
ARG https_proxy
COPY --from=build /app/full /app/
WORKDIR /app
RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
python3 \
python3-venv \
python3-pip && \
python3 -m venv /ov-venv && \
/ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \
/ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete
ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app/
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]

View File

@ -737,6 +737,45 @@ jobs:
-DGGML_SYCL_F16=ON -DGGML_SYCL_F16=ON
cmake --build build --config Release -j $(nproc) cmake --build build --config Release -j $(nproc)
ubuntu-24-cmake-openvino:
runs-on: ubuntu-24.04
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2.16
with:
key: ubuntu-24-cmake-openvino-no-preset-v1
evict-old-files: 1d
- name: Dependencies
id: depends
run: |
export OPENVINO_VERSION_MAJOR=2025.2
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
sudo apt-get update
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
sudo mkdir -p /opt/intel
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
- name: Build
id: cmake_build
run: |
source /opt/intel/openvino/setupvars.sh
cmake -B build/ReleaseOV -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENVINO=ON
cmake --build build/ReleaseOV --config Release -j $(nproc)
build-linux-cross: build-linux-cross:
uses: ./.github/workflows/build-linux-cross.yml uses: ./.github/workflows/build-linux-cross.yml

View File

@ -47,6 +47,7 @@ jobs:
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" } - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" } - { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" }
- { tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } - { tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
- { tag: "openvino", dockerfile: ".devops/openvino.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
steps: steps:
- name: Check out the repo - name: Check out the repo
uses: actions/checkout@v4 uses: actions/checkout@v4

View File

@ -231,6 +231,63 @@ jobs:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
name: llama-bin-ubuntu-vulkan-x64.tar.gz name: llama-bin-ubuntu-vulkan-x64.tar.gz
ubuntu-24-openvino:
runs-on: ubuntu-24.04
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2.16
with:
key: ubuntu-24-cmake-openvino-release-no-preset-v1
evict-old-files: 1d
- name: Dependencies
id: depends
run: |
export OPENVINO_VERSION_MAJOR=2025.2
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
sudo apt-get update
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
sudo mkdir -p /opt/intel
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
- name: Build
id: cmake_build
run: |
source /opt/intel/openvino/setupvars.sh
cmake -B build/ReleaseOV -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENVINO=ON
cmake --build build/ReleaseOV --config Release -j $(nproc)
- name: Determine tag name
id: tag
uses: ./.github/actions/get-tag-name
- name: Pack artifacts
id: pack_artifacts
run: |
cp LICENSE ./build/ReleaseOV/bin/
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip ./build/ReleaseOV/bin/*
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip
name: llama-bin-ubuntu-openvino-x64.zip
windows-cpu: windows-cpu:
runs-on: windows-2025 runs-on: windows-2025

View File

@ -25,6 +25,9 @@
# # with KLEIDIAI support # # with KLEIDIAI support
# GG_BUILD_KLEIDIAI=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt # GG_BUILD_KLEIDIAI=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
# #
# # with OPENVINO support
# GG_BUILD_OPENVINO=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
if [ -z "$2" ]; then if [ -z "$2" ]; then
echo "usage: $0 <output-dir> <mnt-dir>" echo "usage: $0 <output-dir> <mnt-dir>"
@ -165,6 +168,15 @@ if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
-DBUILD_SHARED_LIBS=OFF" -DBUILD_SHARED_LIBS=OFF"
fi fi
if [ ! -z ${GG_BUILD_OPENVINO} ]; then
if [ -z ${OpenVINO_DIR} ]; then
echo "OpenVINO_DIR not found, please install OpenVINO via archives and enable it by:"
echo "source /opt/intel/openvino/setupvars.sh"
exit 1
fi
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON"
fi
## helpers ## helpers
# download a file if it does not exist or if it is outdated # download a file if it does not exist or if it is outdated

View File

@ -25,7 +25,7 @@ The following sections describe how to build with different backends and options
* [Arm® KleidiAI™](#arm-kleidiai) * [Arm® KleidiAI™](#arm-kleidiai)
* [OpenCL](#opencl) * [OpenCL](#opencl)
* [Android](#android-1) * [Android](#android-1)
* [OPENVINO](#openvino) * [OpenVINO](#openvino)
* [Notes about GPU-accelerated backends](#notes-about-gpu-accelerated-backends) * [Notes about GPU-accelerated backends](#notes-about-gpu-accelerated-backends)
## CPU Build ## CPU Build
@ -696,20 +696,48 @@ Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/m
To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md) To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md)
## OPENVINO ## OpenVINO
[OpenVINO](https://docs.openvino.ai/2025/index.html) is a open-source toolkit for optimizing and deploying performant AI inference, specifically designed for Intel hardware including CPUs, GPUs, and NPUs in the cloud, on-prem, and on the edge alike. The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp. [OpenVINO](https://docs.openvino.ai/2025/index.html) is an open-source toolkit for optimizing and deploying high-performance AI inference, specifically designed for Intel hardware, including CPUs, GPUs, and NPUs, in the cloud, on-premises, and on the edge.
The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.
Follow the instructions below to install OpenVINO runtime and build llama.cpp with OpenVINO support. Follow the instructions below to install OpenVINO runtime and build llama.cpp with OpenVINO support.
### Prerequisites
- Linux or Windows system with Intel hardware (CPU, GPU, or NPU)
- **For Intel GPU or NPU Usage**: Install the appropriate hardware drivers for your Intel GPU or NPU. For detailed instructions, see: [Additional Configurations for Hardware Acceleration](https://docs.openvino.ai/2025/get-started/install-openvino/configurations.html).
- Git, CMake, and Ninja software tools are needed for building
```bash
sudo apt-get update
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
```
### 1. Install OpenVINO Runtime ### 1. Install OpenVINO Runtime
- Follow the guide to install OpenVINO Runtime from an archive file: **[Install OpenVINO™ Runtime on Linux from an Archive File.](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html)** - Follow the guide to install OpenVINO Runtime from an archive file: **[Install OpenVINO™ Runtime on Linux from an Archive File.](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html)**
- After installation, make sure to [source the environment setup script](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html#step-2-configure-the-environment): <details>
<summary>📦 Click to expand OpenVINO 2025.2 installation commands</summary>
<br>
```bash ```bash
source /opt/intel/openvino_2025.1.0/setupvars.sh export OPENVINO_VERSION_MAJOR=2025.2
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
sudo apt-get update
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
sudo mkdir -p /opt/intel
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
source /opt/intel/openvino/setupvars.sh
``` ```
</details>
- Verify OpenVINO is initialized properly - Verify OpenVINO is initialized properly
```bash ```bash
echo $OpenVINO_DIR echo $OpenVINO_DIR
@ -725,23 +753,26 @@ cd llama.cpp
git switch dev_backend_openvino git switch dev_backend_openvino
# Build with OpenVINO support # Build with OpenVINO support
cmake --preset ReleaseOV source /opt/intel/openvino/setupvars.sh
cmake --build build/ReleaseOV --parallel cmake -B build/ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON
cmake --build build/ReleaseOV --config Release -j $(nproc)
``` ```
### 3. Download Sample Model ### 3. Download Sample Model
Download the Phi-3 mini model for testing: Download models for testing:
```bash ```bash
# Create models directory # Create models directory
mkdir -p ~/models/Phi-3-mini-4k-instruct-gguf mkdir -p ~/models/
# Download model file # Download model file: Llama-3.2-1B-Instruct.fp16.gguf
wget https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.fp16.gguf \
-O ~/models/Llama-3.2-1B-Instruct.fp16.gguf
# Download model file: Phi-3-mini-4k-instruct-fp16.gguf
wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf \ wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf \
-O ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf -O ~/models/Phi-3-mini-4k-instruct-fp16.gguf
``` ```
### 4. Run inference with OpenVINO backend: ### 4. Run inference with OpenVINO backend:
@ -750,28 +781,19 @@ When using the OpenVINO backend, the first inference token may have slightly hig
```bash ```bash
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
# Default device is GPU.
# If not set, automatically selects the first available device in priority order: GPU, CPU, NPU.
export GGML_OPENVINO_DEVICE=GPU
./build/ReleaseOV/bin/llama-simple \ ./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
-n 50 \
"Hello, my name is "
``` ```
### Using Llama.cpp's Built-in CPU Backend (for Comparison) To run in chat mode:
To compare performance with the deafult CPU backend:
```bash ```bash
# Build CPU-only version export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
cmake --preset ReleaseCPU
cmake --build build/ReleaseCPU --parallel
# Run with Default CPU backend ./build/ReleaseOV/bin/llama-cli -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
./build/ReleaseCPU/bin/llama-simple \
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
-n 50 \
"Hello, my name is "
``` ```
@ -779,13 +801,14 @@ cmake --build build/ReleaseCPU --parallel
Control OpenVINO behavior using these environment variables: Control OpenVINO behavior using these environment variables:
- **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO. - **`GGML_OPENVINO_DEVICE`**: Specify the target device for OpenVINO inference. If not set, automatically selects the first available device in priority order: GPU, CPU, NPU. When set to `NPU` to use Intel NPUs, it enables static compilation mode for optimal performance.
- **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO. Note: Not supported when using NPU devices yet.
- **`GGML_OPENVINO_WEIGHT_AS_INPUT`**: Pass the weights as input to the OpenVINO model instead of creating Constant nodes for them. - **`GGML_OPENVINO_WEIGHT_AS_INPUT`**: Pass the weights as input to the OpenVINO model instead of creating Constant nodes for them.
- **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling - **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling.
- **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt` - **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`.
- **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps - **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps.
- **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging - **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging.
- **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging - **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging.
### Example with Profiling ### Example with Profiling
@ -793,11 +816,20 @@ Control OpenVINO behavior using these environment variables:
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
export GGML_OPENVINO_PROFILING=1 export GGML_OPENVINO_PROFILING=1
./build/ReleaseOV/bin/llama-simple \ ./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \ ```
-n 50 \
"Hello, my name is "
### Using Llama.cpp's Built-in CPU Backend (for Comparison)
To compare performance with the default CPU backend:
```bash
# Build CPU-only version
cmake --preset ReleaseCPU
cmake --build build/ReleaseCPU --parallel
# Run with the default CPU backend
./build/ReleaseCPU/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
``` ```
## Notes about GPU-accelerated backends ## Notes about GPU-accelerated backends