Added OpenVINO CI/CD. Updated docs
This commit is contained in:
parent
d61f83c9b7
commit
ea75772e48
|
|
@ -0,0 +1,134 @@
|
||||||
|
ARG OPENVINO_VERSION_MAJOR=2025.2
|
||||||
|
ARG OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
|
||||||
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
|
# Optional proxy build arguments - empty by default
|
||||||
|
ARG http_proxy=
|
||||||
|
ARG https_proxy=
|
||||||
|
|
||||||
|
## Build Image
|
||||||
|
FROM ubuntu:${UBUNTU_VERSION} AS build
|
||||||
|
|
||||||
|
# Pass proxy args to build stage
|
||||||
|
ARG http_proxy
|
||||||
|
ARG https_proxy
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ca-certificates \
|
||||||
|
gnupg \
|
||||||
|
wget \
|
||||||
|
git \
|
||||||
|
cmake \
|
||||||
|
ninja-build \
|
||||||
|
build-essential \
|
||||||
|
libtbb12 \
|
||||||
|
libcurl4-openssl-dev && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install OpenVINO for Ubuntu 24.04
|
||||||
|
ARG OPENVINO_VERSION_MAJOR
|
||||||
|
ARG OPENVINO_VERSION_FULL
|
||||||
|
RUN mkdir -p /opt/intel && \
|
||||||
|
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
|
||||||
|
tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
|
||||||
|
mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
|
||||||
|
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
|
||||||
|
echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \
|
||||||
|
cd - && \
|
||||||
|
ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||||
|
|
||||||
|
ENV OpenVINO_DIR=/opt/intel/openvino
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Build Stage
|
||||||
|
RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
|
||||||
|
cmake -B build/ReleaseOV -G Ninja \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DGGML_OPENVINO=ON && \
|
||||||
|
cmake --build build/ReleaseOV -j$(nproc)"
|
||||||
|
|
||||||
|
# Copy all necessary libraries
|
||||||
|
RUN mkdir -p /app/lib && \
|
||||||
|
find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \
|
||||||
|
find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \
|
||||||
|
find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \;
|
||||||
|
|
||||||
|
# Create runtime directories and copy binaries
|
||||||
|
RUN mkdir -p /app/full \
|
||||||
|
&& cp build/ReleaseOV/bin/* /app/full/ \
|
||||||
|
&& cp *.py /app/full \
|
||||||
|
&& cp -r gguf-py /app/full \
|
||||||
|
&& cp -r requirements /app/full \
|
||||||
|
&& cp requirements.txt /app/full \
|
||||||
|
&& cp .devops/tools.sh /app/full/tools.sh
|
||||||
|
|
||||||
|
## Base Runtime Image
|
||||||
|
FROM ubuntu:${UBUNTU_VERSION} AS base
|
||||||
|
|
||||||
|
# Pass proxy args to runtime stage
|
||||||
|
ARG http_proxy
|
||||||
|
ARG https_proxy
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y libgomp1 libtbb12 curl\
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
COPY --from=build /app/lib/ /app/
|
||||||
|
|
||||||
|
### Full (all binaries)
|
||||||
|
FROM base AS full
|
||||||
|
|
||||||
|
ARG http_proxy
|
||||||
|
ARG https_proxy
|
||||||
|
|
||||||
|
COPY --from=build /app/full /app/
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
git \
|
||||||
|
python3 \
|
||||||
|
python3-venv \
|
||||||
|
python3-pip && \
|
||||||
|
python3 -m venv /ov-venv && \
|
||||||
|
/ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
||||||
|
/ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \
|
||||||
|
apt-get autoremove -y && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /tmp/* /var/tmp/* && \
|
||||||
|
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
|
||||||
|
find /var/cache -type f -delete
|
||||||
|
|
||||||
|
ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"]
|
||||||
|
|
||||||
|
|
||||||
|
### Light, CLI only
|
||||||
|
FROM base AS light
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-cli /app/
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
||||||
|
|
||||||
|
### Server, Server only
|
||||||
|
FROM base AS server
|
||||||
|
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
|
COPY --from=build /app/full/llama-server /app/
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
||||||
|
|
@ -737,6 +737,45 @@ jobs:
|
||||||
-DGGML_SYCL_F16=ON
|
-DGGML_SYCL_F16=ON
|
||||||
cmake --build build --config Release -j $(nproc)
|
cmake --build build --config Release -j $(nproc)
|
||||||
|
|
||||||
|
ubuntu-24-cmake-openvino:
|
||||||
|
runs-on: ubuntu-24.04
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ubuntu-24-cmake-openvino-no-preset-v1
|
||||||
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
export OPENVINO_VERSION_MAJOR=2025.2
|
||||||
|
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
|
||||||
|
sudo mkdir -p /opt/intel
|
||||||
|
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
|
||||||
|
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||||
|
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||||
|
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||||
|
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||||
|
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
|
||||||
|
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
source /opt/intel/openvino/setupvars.sh
|
||||||
|
cmake -B build/ReleaseOV -G Ninja \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DGGML_OPENVINO=ON
|
||||||
|
cmake --build build/ReleaseOV --config Release -j $(nproc)
|
||||||
|
|
||||||
build-linux-cross:
|
build-linux-cross:
|
||||||
uses: ./.github/workflows/build-linux-cross.yml
|
uses: ./.github/workflows/build-linux-cross.yml
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,7 @@ jobs:
|
||||||
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
|
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
|
||||||
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" }
|
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" }
|
||||||
- { tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
- { tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
||||||
|
- { tag: "openvino", dockerfile: ".devops/openvino.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
|
||||||
steps:
|
steps:
|
||||||
- name: Check out the repo
|
- name: Check out the repo
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
|
||||||
|
|
@ -231,6 +231,63 @@ jobs:
|
||||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
|
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
|
||||||
name: llama-bin-ubuntu-vulkan-x64.tar.gz
|
name: llama-bin-ubuntu-vulkan-x64.tar.gz
|
||||||
|
|
||||||
|
ubuntu-24-openvino:
|
||||||
|
runs-on: ubuntu-24.04
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: ubuntu-24-cmake-openvino-release-no-preset-v1
|
||||||
|
evict-old-files: 1d
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
export OPENVINO_VERSION_MAJOR=2025.2
|
||||||
|
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
|
||||||
|
sudo mkdir -p /opt/intel
|
||||||
|
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
|
||||||
|
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||||
|
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||||
|
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||||
|
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||||
|
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
|
||||||
|
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
source /opt/intel/openvino/setupvars.sh
|
||||||
|
cmake -B build/ReleaseOV -G Ninja \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DGGML_OPENVINO=ON
|
||||||
|
cmake --build build/ReleaseOV --config Release -j $(nproc)
|
||||||
|
|
||||||
|
- name: Determine tag name
|
||||||
|
id: tag
|
||||||
|
uses: ./.github/actions/get-tag-name
|
||||||
|
|
||||||
|
- name: Pack artifacts
|
||||||
|
id: pack_artifacts
|
||||||
|
run: |
|
||||||
|
cp LICENSE ./build/ReleaseOV/bin/
|
||||||
|
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip ./build/ReleaseOV/bin/*
|
||||||
|
|
||||||
|
- name: Upload artifacts
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip
|
||||||
|
name: llama-bin-ubuntu-openvino-x64.zip
|
||||||
|
|
||||||
windows-cpu:
|
windows-cpu:
|
||||||
runs-on: windows-2025
|
runs-on: windows-2025
|
||||||
|
|
||||||
|
|
|
||||||
12
ci/run.sh
12
ci/run.sh
|
|
@ -25,6 +25,9 @@
|
||||||
# # with KLEIDIAI support
|
# # with KLEIDIAI support
|
||||||
# GG_BUILD_KLEIDIAI=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
# GG_BUILD_KLEIDIAI=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
#
|
#
|
||||||
|
# # with OPENVINO support
|
||||||
|
# GG_BUILD_OPENVINO=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
|
#
|
||||||
|
|
||||||
if [ -z "$2" ]; then
|
if [ -z "$2" ]; then
|
||||||
echo "usage: $0 <output-dir> <mnt-dir>"
|
echo "usage: $0 <output-dir> <mnt-dir>"
|
||||||
|
|
@ -165,6 +168,15 @@ if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
|
||||||
-DBUILD_SHARED_LIBS=OFF"
|
-DBUILD_SHARED_LIBS=OFF"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ! -z ${GG_BUILD_OPENVINO} ]; then
|
||||||
|
if [ -z ${OpenVINO_DIR} ]; then
|
||||||
|
echo "OpenVINO_DIR not found, please install OpenVINO via archives and enable it by:"
|
||||||
|
echo "source /opt/intel/openvino/setupvars.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON"
|
||||||
|
fi
|
||||||
|
|
||||||
## helpers
|
## helpers
|
||||||
|
|
||||||
# download a file if it does not exist or if it is outdated
|
# download a file if it does not exist or if it is outdated
|
||||||
|
|
|
||||||
110
docs/build.md
110
docs/build.md
|
|
@ -25,7 +25,7 @@ The following sections describe how to build with different backends and options
|
||||||
* [Arm® KleidiAI™](#arm-kleidiai)
|
* [Arm® KleidiAI™](#arm-kleidiai)
|
||||||
* [OpenCL](#opencl)
|
* [OpenCL](#opencl)
|
||||||
* [Android](#android-1)
|
* [Android](#android-1)
|
||||||
* [OPENVINO](#openvino)
|
* [OpenVINO](#openvino)
|
||||||
* [Notes about GPU-accelerated backends](#notes-about-gpu-accelerated-backends)
|
* [Notes about GPU-accelerated backends](#notes-about-gpu-accelerated-backends)
|
||||||
|
|
||||||
## CPU Build
|
## CPU Build
|
||||||
|
|
@ -696,20 +696,48 @@ Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/m
|
||||||
|
|
||||||
To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md)
|
To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md)
|
||||||
|
|
||||||
## OPENVINO
|
## OpenVINO
|
||||||
|
|
||||||
[OpenVINO](https://docs.openvino.ai/2025/index.html) is a open-source toolkit for optimizing and deploying performant AI inference, specifically designed for Intel hardware including CPUs, GPUs, and NPUs in the cloud, on-prem, and on the edge alike. The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.
|
[OpenVINO](https://docs.openvino.ai/2025/index.html) is an open-source toolkit for optimizing and deploying high-performance AI inference, specifically designed for Intel hardware, including CPUs, GPUs, and NPUs, in the cloud, on-premises, and on the edge.
|
||||||
|
The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.
|
||||||
|
|
||||||
Follow the instructions below to install OpenVINO runtime and build llama.cpp with OpenVINO support.
|
Follow the instructions below to install OpenVINO runtime and build llama.cpp with OpenVINO support.
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Linux or Windows system with Intel hardware (CPU, GPU, or NPU)
|
||||||
|
- **For Intel GPU or NPU Usage**: Install the appropriate hardware drivers for your Intel GPU or NPU. For detailed instructions, see: [Additional Configurations for Hardware Acceleration](https://docs.openvino.ai/2025/get-started/install-openvino/configurations.html).
|
||||||
|
- Git, CMake, and Ninja software tools are needed for building
|
||||||
|
```bash
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
|
||||||
|
```
|
||||||
|
|
||||||
### 1. Install OpenVINO Runtime
|
### 1. Install OpenVINO Runtime
|
||||||
|
|
||||||
- Follow the guide to install OpenVINO Runtime from an archive file: **[Install OpenVINO™ Runtime on Linux from an Archive File.](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html)**
|
- Follow the guide to install OpenVINO Runtime from an archive file: **[Install OpenVINO™ Runtime on Linux from an Archive File.](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html)**
|
||||||
|
|
||||||
- After installation, make sure to [source the environment setup script](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html#step-2-configure-the-environment):
|
<details>
|
||||||
|
<summary>📦 Click to expand OpenVINO 2025.2 installation commands</summary>
|
||||||
|
<br>
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
source /opt/intel/openvino_2025.1.0/setupvars.sh
|
export OPENVINO_VERSION_MAJOR=2025.2
|
||||||
|
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
|
||||||
|
sudo mkdir -p /opt/intel
|
||||||
|
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
|
||||||
|
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||||
|
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||||
|
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||||
|
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||||
|
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
|
||||||
|
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||||
|
source /opt/intel/openvino/setupvars.sh
|
||||||
```
|
```
|
||||||
|
</details>
|
||||||
|
|
||||||
- Verify OpenVINO is initialized properly
|
- Verify OpenVINO is initialized properly
|
||||||
```bash
|
```bash
|
||||||
echo $OpenVINO_DIR
|
echo $OpenVINO_DIR
|
||||||
|
|
@ -725,23 +753,26 @@ cd llama.cpp
|
||||||
git switch dev_backend_openvino
|
git switch dev_backend_openvino
|
||||||
|
|
||||||
# Build with OpenVINO support
|
# Build with OpenVINO support
|
||||||
cmake --preset ReleaseOV
|
source /opt/intel/openvino/setupvars.sh
|
||||||
cmake --build build/ReleaseOV --parallel
|
cmake -B build/ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON
|
||||||
|
cmake --build build/ReleaseOV --config Release -j $(nproc)
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Download Sample Model
|
### 3. Download Sample Model
|
||||||
|
|
||||||
Download the Phi-3 mini model for testing:
|
Download models for testing:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Create models directory
|
# Create models directory
|
||||||
mkdir -p ~/models/Phi-3-mini-4k-instruct-gguf
|
mkdir -p ~/models/
|
||||||
|
|
||||||
# Download model file
|
# Download model file: Llama-3.2-1B-Instruct.fp16.gguf
|
||||||
|
wget https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.fp16.gguf \
|
||||||
|
-O ~/models/Llama-3.2-1B-Instruct.fp16.gguf
|
||||||
|
|
||||||
|
# Download model file: Phi-3-mini-4k-instruct-fp16.gguf
|
||||||
wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf \
|
wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf \
|
||||||
-O ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf
|
-O ~/models/Phi-3-mini-4k-instruct-fp16.gguf
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### 4. Run inference with OpenVINO backend:
|
### 4. Run inference with OpenVINO backend:
|
||||||
|
|
@ -750,28 +781,19 @@ When using the OpenVINO backend, the first inference token may have slightly hig
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
|
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
|
||||||
|
# Default device is GPU.
|
||||||
|
# If not set, automatically selects the first available device in priority order: GPU, CPU, NPU.
|
||||||
|
export GGML_OPENVINO_DEVICE=GPU
|
||||||
|
|
||||||
./build/ReleaseOV/bin/llama-simple \
|
./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
|
||||||
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
|
|
||||||
-n 50 \
|
|
||||||
"Hello, my name is "
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Using Llama.cpp's Built-in CPU Backend (for Comparison)
|
To run in chat mode:
|
||||||
|
|
||||||
To compare performance with the deafult CPU backend:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Build CPU-only version
|
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
|
||||||
cmake --preset ReleaseCPU
|
|
||||||
cmake --build build/ReleaseCPU --parallel
|
|
||||||
|
|
||||||
# Run with Default CPU backend
|
./build/ReleaseOV/bin/llama-cli -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
|
||||||
./build/ReleaseCPU/bin/llama-simple \
|
|
||||||
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
|
|
||||||
-n 50 \
|
|
||||||
"Hello, my name is "
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -779,13 +801,14 @@ cmake --build build/ReleaseCPU --parallel
|
||||||
|
|
||||||
Control OpenVINO behavior using these environment variables:
|
Control OpenVINO behavior using these environment variables:
|
||||||
|
|
||||||
- **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO.
|
- **`GGML_OPENVINO_DEVICE`**: Specify the target device for OpenVINO inference. If not set, automatically selects the first available device in priority order: GPU, CPU, NPU. When set to `NPU` to use Intel NPUs, it enables static compilation mode for optimal performance.
|
||||||
|
- **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO. Note: Not supported when using NPU devices yet.
|
||||||
- **`GGML_OPENVINO_WEIGHT_AS_INPUT`**: Pass the weights as input to the OpenVINO model instead of creating Constant nodes for them.
|
- **`GGML_OPENVINO_WEIGHT_AS_INPUT`**: Pass the weights as input to the OpenVINO model instead of creating Constant nodes for them.
|
||||||
- **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling
|
- **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling.
|
||||||
- **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`
|
- **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`.
|
||||||
- **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps
|
- **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps.
|
||||||
- **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging
|
- **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging.
|
||||||
- **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging
|
- **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging.
|
||||||
|
|
||||||
### Example with Profiling
|
### Example with Profiling
|
||||||
|
|
||||||
|
|
@ -793,11 +816,20 @@ Control OpenVINO behavior using these environment variables:
|
||||||
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
|
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
|
||||||
export GGML_OPENVINO_PROFILING=1
|
export GGML_OPENVINO_PROFILING=1
|
||||||
|
|
||||||
./build/ReleaseOV/bin/llama-simple \
|
./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
|
||||||
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
|
```
|
||||||
-n 50 \
|
|
||||||
"Hello, my name is "
|
|
||||||
|
|
||||||
|
### Using Llama.cpp's Built-in CPU Backend (for Comparison)
|
||||||
|
|
||||||
|
To compare performance with the default CPU backend:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build CPU-only version
|
||||||
|
cmake --preset ReleaseCPU
|
||||||
|
cmake --build build/ReleaseCPU --parallel
|
||||||
|
|
||||||
|
# Run with the default CPU backend
|
||||||
|
./build/ReleaseCPU/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
|
||||||
```
|
```
|
||||||
|
|
||||||
## Notes about GPU-accelerated backends
|
## Notes about GPU-accelerated backends
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue