Added OpenVINO CI/CD. Updated docs
This commit is contained in:
parent
d61f83c9b7
commit
ea75772e48
|
|
@ -0,0 +1,134 @@
|
|||
ARG OPENVINO_VERSION_MAJOR=2025.2
|
||||
ARG OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
|
||||
ARG UBUNTU_VERSION=24.04
|
||||
|
||||
# Optional proxy build arguments - empty by default
|
||||
ARG http_proxy=
|
||||
ARG https_proxy=
|
||||
|
||||
## Build Image
|
||||
FROM ubuntu:${UBUNTU_VERSION} AS build
|
||||
|
||||
# Pass proxy args to build stage
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
gnupg \
|
||||
wget \
|
||||
git \
|
||||
cmake \
|
||||
ninja-build \
|
||||
build-essential \
|
||||
libtbb12 \
|
||||
libcurl4-openssl-dev && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install OpenVINO for Ubuntu 24.04
|
||||
ARG OPENVINO_VERSION_MAJOR
|
||||
ARG OPENVINO_VERSION_FULL
|
||||
RUN mkdir -p /opt/intel && \
|
||||
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
|
||||
tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
|
||||
mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
|
||||
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
|
||||
echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \
|
||||
cd - && \
|
||||
ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||
|
||||
ENV OpenVINO_DIR=/opt/intel/openvino
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Build Stage
|
||||
RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
|
||||
cmake -B build/ReleaseOV -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DGGML_OPENVINO=ON && \
|
||||
cmake --build build/ReleaseOV -j$(nproc)"
|
||||
|
||||
# Copy all necessary libraries
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \
|
||||
find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \
|
||||
find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \;
|
||||
|
||||
# Create runtime directories and copy binaries
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/ReleaseOV/bin/* /app/full/ \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base Runtime Image
|
||||
FROM ubuntu:${UBUNTU_VERSION} AS base
|
||||
|
||||
# Pass proxy args to runtime stage
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 libtbb12 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app/
|
||||
|
||||
### Full (all binaries)
|
||||
FROM base AS full
|
||||
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
|
||||
COPY --from=build /app/full /app/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
python3 \
|
||||
python3-venv \
|
||||
python3-pip && \
|
||||
python3 -m venv /ov-venv && \
|
||||
/ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
||||
/ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean && \
|
||||
rm -rf /tmp/* /var/tmp/* && \
|
||||
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
|
||||
find /var/cache -type f -delete
|
||||
|
||||
ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"]
|
||||
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
||||
|
|
@ -737,6 +737,45 @@ jobs:
|
|||
-DGGML_SYCL_F16=ON
|
||||
cmake --build build --config Release -j $(nproc)
|
||||
|
||||
ubuntu-24-cmake-openvino:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: ccache
|
||||
uses: hendrikmuhs/ccache-action@v1.2.16
|
||||
with:
|
||||
key: ubuntu-24-cmake-openvino-no-preset-v1
|
||||
evict-old-files: 1d
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
run: |
|
||||
export OPENVINO_VERSION_MAJOR=2025.2
|
||||
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
|
||||
sudo mkdir -p /opt/intel
|
||||
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
|
||||
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
|
||||
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
source /opt/intel/openvino/setupvars.sh
|
||||
cmake -B build/ReleaseOV -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DGGML_OPENVINO=ON
|
||||
cmake --build build/ReleaseOV --config Release -j $(nproc)
|
||||
|
||||
build-linux-cross:
|
||||
uses: ./.github/workflows/build-linux-cross.yml
|
||||
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ jobs:
|
|||
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" }
|
||||
- { tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
|
||||
- { tag: "openvino", dockerfile: ".devops/openvino.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
|
||||
steps:
|
||||
- name: Check out the repo
|
||||
uses: actions/checkout@v4
|
||||
|
|
|
|||
|
|
@ -231,6 +231,63 @@ jobs:
|
|||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
|
||||
name: llama-bin-ubuntu-vulkan-x64.tar.gz
|
||||
|
||||
ubuntu-24-openvino:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: ccache
|
||||
uses: hendrikmuhs/ccache-action@v1.2.16
|
||||
with:
|
||||
key: ubuntu-24-cmake-openvino-release-no-preset-v1
|
||||
evict-old-files: 1d
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
run: |
|
||||
export OPENVINO_VERSION_MAJOR=2025.2
|
||||
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
|
||||
sudo mkdir -p /opt/intel
|
||||
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
|
||||
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
|
||||
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
source /opt/intel/openvino/setupvars.sh
|
||||
cmake -B build/ReleaseOV -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DGGML_OPENVINO=ON
|
||||
cmake --build build/ReleaseOV --config Release -j $(nproc)
|
||||
|
||||
- name: Determine tag name
|
||||
id: tag
|
||||
uses: ./.github/actions/get-tag-name
|
||||
|
||||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/ReleaseOV/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip ./build/ReleaseOV/bin/*
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-x64.zip
|
||||
name: llama-bin-ubuntu-openvino-x64.zip
|
||||
|
||||
windows-cpu:
|
||||
runs-on: windows-2025
|
||||
|
||||
|
|
|
|||
12
ci/run.sh
12
ci/run.sh
|
|
@ -25,6 +25,9 @@
|
|||
# # with KLEIDIAI support
|
||||
# GG_BUILD_KLEIDIAI=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
#
|
||||
# # with OPENVINO support
|
||||
# GG_BUILD_OPENVINO=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
#
|
||||
|
||||
if [ -z "$2" ]; then
|
||||
echo "usage: $0 <output-dir> <mnt-dir>"
|
||||
|
|
@ -165,6 +168,15 @@ if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
|
|||
-DBUILD_SHARED_LIBS=OFF"
|
||||
fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_OPENVINO} ]; then
|
||||
if [ -z ${OpenVINO_DIR} ]; then
|
||||
echo "OpenVINO_DIR not found, please install OpenVINO via archives and enable it by:"
|
||||
echo "source /opt/intel/openvino/setupvars.sh"
|
||||
exit 1
|
||||
fi
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON"
|
||||
fi
|
||||
|
||||
## helpers
|
||||
|
||||
# download a file if it does not exist or if it is outdated
|
||||
|
|
|
|||
110
docs/build.md
110
docs/build.md
|
|
@ -25,7 +25,7 @@ The following sections describe how to build with different backends and options
|
|||
* [Arm® KleidiAI™](#arm-kleidiai)
|
||||
* [OpenCL](#opencl)
|
||||
* [Android](#android-1)
|
||||
* [OPENVINO](#openvino)
|
||||
* [OpenVINO](#openvino)
|
||||
* [Notes about GPU-accelerated backends](#notes-about-gpu-accelerated-backends)
|
||||
|
||||
## CPU Build
|
||||
|
|
@ -696,20 +696,48 @@ Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/m
|
|||
|
||||
To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md)
|
||||
|
||||
## OPENVINO
|
||||
## OpenVINO
|
||||
|
||||
[OpenVINO](https://docs.openvino.ai/2025/index.html) is a open-source toolkit for optimizing and deploying performant AI inference, specifically designed for Intel hardware including CPUs, GPUs, and NPUs in the cloud, on-prem, and on the edge alike. The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.
|
||||
[OpenVINO](https://docs.openvino.ai/2025/index.html) is an open-source toolkit for optimizing and deploying high-performance AI inference, specifically designed for Intel hardware, including CPUs, GPUs, and NPUs, in the cloud, on-premises, and on the edge.
|
||||
The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.
|
||||
|
||||
Follow the instructions below to install OpenVINO runtime and build llama.cpp with OpenVINO support.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Linux or Windows system with Intel hardware (CPU, GPU, or NPU)
|
||||
- **For Intel GPU or NPU Usage**: Install the appropriate hardware drivers for your Intel GPU or NPU. For detailed instructions, see: [Additional Configurations for Hardware Acceleration](https://docs.openvino.ai/2025/get-started/install-openvino/configurations.html).
|
||||
- Git, CMake, and Ninja software tools are needed for building
|
||||
```bash
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
|
||||
```
|
||||
|
||||
### 1. Install OpenVINO Runtime
|
||||
|
||||
- Follow the guide to install OpenVINO Runtime from an archive file: **[Install OpenVINO™ Runtime on Linux from an Archive File.](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html)**
|
||||
|
||||
- After installation, make sure to [source the environment setup script](https://docs.openvino.ai/2025/get-started/install-openvino/install-openvino-archive-linux.html#step-2-configure-the-environment):
|
||||
<details>
|
||||
<summary>📦 Click to expand OpenVINO 2025.2 installation commands</summary>
|
||||
<br>
|
||||
|
||||
```bash
|
||||
source /opt/intel/openvino_2025.1.0/setupvars.sh
|
||||
export OPENVINO_VERSION_MAJOR=2025.2
|
||||
export OPENVINO_VERSION_FULL=2025.2.0.19140.c01cd93e24d
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential libcurl4-openssl-dev libtbb12 cmake ninja-build python3-pip curl wget tar
|
||||
sudo mkdir -p /opt/intel
|
||||
wget -O openvino_${OPENVINO_VERSION_MAJOR}.tgz https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz
|
||||
tar -xf openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||
sudo mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||
rm openvino_${OPENVINO_VERSION_MAJOR}.tgz
|
||||
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR}
|
||||
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh && cd -
|
||||
sudo ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||
source /opt/intel/openvino/setupvars.sh
|
||||
```
|
||||
</details>
|
||||
|
||||
- Verify OpenVINO is initialized properly
|
||||
```bash
|
||||
echo $OpenVINO_DIR
|
||||
|
|
@ -725,23 +753,26 @@ cd llama.cpp
|
|||
git switch dev_backend_openvino
|
||||
|
||||
# Build with OpenVINO support
|
||||
cmake --preset ReleaseOV
|
||||
cmake --build build/ReleaseOV --parallel
|
||||
|
||||
source /opt/intel/openvino/setupvars.sh
|
||||
cmake -B build/ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON
|
||||
cmake --build build/ReleaseOV --config Release -j $(nproc)
|
||||
```
|
||||
|
||||
### 3. Download Sample Model
|
||||
|
||||
Download the Phi-3 mini model for testing:
|
||||
Download models for testing:
|
||||
|
||||
```bash
|
||||
# Create models directory
|
||||
mkdir -p ~/models/Phi-3-mini-4k-instruct-gguf
|
||||
mkdir -p ~/models/
|
||||
|
||||
# Download model file
|
||||
# Download model file: Llama-3.2-1B-Instruct.fp16.gguf
|
||||
wget https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.fp16.gguf \
|
||||
-O ~/models/Llama-3.2-1B-Instruct.fp16.gguf
|
||||
|
||||
# Download model file: Phi-3-mini-4k-instruct-fp16.gguf
|
||||
wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf \
|
||||
-O ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf
|
||||
|
||||
-O ~/models/Phi-3-mini-4k-instruct-fp16.gguf
|
||||
```
|
||||
|
||||
### 4. Run inference with OpenVINO backend:
|
||||
|
|
@ -750,28 +781,19 @@ When using the OpenVINO backend, the first inference token may have slightly hig
|
|||
|
||||
```bash
|
||||
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
|
||||
# Default device is GPU.
|
||||
# If not set, automatically selects the first available device in priority order: GPU, CPU, NPU.
|
||||
export GGML_OPENVINO_DEVICE=GPU
|
||||
|
||||
./build/ReleaseOV/bin/llama-simple \
|
||||
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
|
||||
-n 50 \
|
||||
"Hello, my name is "
|
||||
./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
|
||||
|
||||
```
|
||||
|
||||
### Using Llama.cpp's Built-in CPU Backend (for Comparison)
|
||||
|
||||
To compare performance with the deafult CPU backend:
|
||||
|
||||
To run in chat mode:
|
||||
```bash
|
||||
# Build CPU-only version
|
||||
cmake --preset ReleaseCPU
|
||||
cmake --build build/ReleaseCPU --parallel
|
||||
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
|
||||
|
||||
# Run with Default CPU backend
|
||||
./build/ReleaseCPU/bin/llama-simple \
|
||||
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
|
||||
-n 50 \
|
||||
"Hello, my name is "
|
||||
./build/ReleaseOV/bin/llama-cli -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
|
||||
|
||||
```
|
||||
|
||||
|
|
@ -779,13 +801,14 @@ cmake --build build/ReleaseCPU --parallel
|
|||
|
||||
Control OpenVINO behavior using these environment variables:
|
||||
|
||||
- **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO.
|
||||
- **`GGML_OPENVINO_DEVICE`**: Specify the target device for OpenVINO inference. If not set, automatically selects the first available device in priority order: GPU, CPU, NPU. When set to `NPU` to use Intel NPUs, it enables static compilation mode for optimal performance.
|
||||
- **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO. Note: Not supported when using NPU devices yet.
|
||||
- **`GGML_OPENVINO_WEIGHT_AS_INPUT`**: Pass the weights as input to the OpenVINO model instead of creating Constant nodes for them.
|
||||
- **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling
|
||||
- **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`
|
||||
- **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps
|
||||
- **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging
|
||||
- **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging
|
||||
- **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling.
|
||||
- **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`.
|
||||
- **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps.
|
||||
- **`GGML_OPENVINO_DEBUG_INPUT`**: Enable input debugging.
|
||||
- **`GGML_OPENVINO_DEBUG_OUTPUT`**: Enable output debugging.
|
||||
|
||||
### Example with Profiling
|
||||
|
||||
|
|
@ -793,11 +816,20 @@ Control OpenVINO behavior using these environment variables:
|
|||
export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
|
||||
export GGML_OPENVINO_PROFILING=1
|
||||
|
||||
./build/ReleaseOV/bin/llama-simple \
|
||||
-m ~/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf \
|
||||
-n 50 \
|
||||
"Hello, my name is "
|
||||
./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
|
||||
```
|
||||
|
||||
### Using Llama.cpp's Built-in CPU Backend (for Comparison)
|
||||
|
||||
To compare performance with the default CPU backend:
|
||||
|
||||
```bash
|
||||
# Build CPU-only version
|
||||
cmake --preset ReleaseCPU
|
||||
cmake --build build/ReleaseCPU --parallel
|
||||
|
||||
# Run with the default CPU backend
|
||||
./build/ReleaseCPU/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
|
||||
```
|
||||
|
||||
## Notes about GPU-accelerated backends
|
||||
|
|
|
|||
Loading…
Reference in New Issue