diff --git a/.devops/cann.Dockerfile b/.devops/cann.Dockerfile
index db221b0b81..6de22215e4 100644
--- a/.devops/cann.Dockerfile
+++ b/.devops/cann.Dockerfile
@@ -13,7 +13,7 @@ ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.
FROM ${CANN_BASE_IMAGE} AS build
# -- Install build dependencies --
-RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
+RUN yum install -y gcc g++ cmake make git openssl-devel python3 python3-pip && \
yum clean all && \
rm -rf /var/cache/yum
@@ -42,6 +42,7 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
-DGGML_CANN=ON \
-DCMAKE_BUILD_TYPE=Release \
-DSOC_TYPE=ascend${CHIP_TYPE} \
+ -DUSE_ACL_GRAPH=ON \
. && \
cmake --build build --config Release -j$(nproc)
diff --git a/.devops/cpu.Dockerfile b/.devops/cpu.Dockerfile
index b9e84ab986..c70a2de562 100644
--- a/.devops/cpu.Dockerfile
+++ b/.devops/cpu.Dockerfile
@@ -5,7 +5,7 @@ FROM ubuntu:$UBUNTU_VERSION AS build
ARG TARGETARCH
RUN apt-get update && \
- apt-get install -y build-essential git cmake libcurl4-openssl-dev
+ apt-get install -y build-essential git cmake libssl-dev
WORKDIR /app
diff --git a/.devops/cuda-new.Dockerfile b/.devops/cuda-new.Dockerfile
index 62443e17f2..98dc147d7e 100644
--- a/.devops/cuda-new.Dockerfile
+++ b/.devops/cuda-new.Dockerfile
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
ARG CUDA_DOCKER_ARCH=default
RUN apt-get update && \
- apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
+ apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
WORKDIR /app
diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile
index fed5863157..52f103bc31 100644
--- a/.devops/cuda.Dockerfile
+++ b/.devops/cuda.Dockerfile
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
ARG CUDA_DOCKER_ARCH=default
RUN apt-get update && \
- apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
+ apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
WORKDIR /app
diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile
index adebf08229..35ea4ade8e 100644
--- a/.devops/intel.Dockerfile
+++ b/.devops/intel.Dockerfile
@@ -6,7 +6,7 @@ FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
ARG GGML_SYCL_F16=OFF
RUN apt-get update && \
- apt-get install -y git libcurl4-openssl-dev
+ apt-get install -y git libssl-dev
WORKDIR /app
diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile
index 6581187f32..5bbc9ee43b 100644
--- a/.devops/llama-cli-cann.Dockerfile
+++ b/.devops/llama-cli-cann.Dockerfile
@@ -6,7 +6,7 @@ WORKDIR /app
COPY . .
-RUN yum install -y gcc g++ cmake make libcurl-devel
+RUN yum install -y gcc g++ cmake make openssl-devel
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile
index 34d6ad9f40..9eb4985204 100644
--- a/.devops/musa.Dockerfile
+++ b/.devops/musa.Dockerfile
@@ -18,7 +18,7 @@ RUN apt-get update && \
python3 \
python3-pip \
git \
- libcurl4-openssl-dev \
+ libssl-dev \
libgomp1
WORKDIR /app
diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index a13996bd68..79a7270e5d 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -32,7 +32,6 @@
useMpi ? false,
useRocm ? config.rocmSupport,
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
- enableCurl ? true,
useVulkan ? false,
useRpc ? false,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
@@ -160,15 +159,13 @@ effectiveStdenv.mkDerivation (finalAttrs: {
++ optionals useMpi [ mpi ]
++ optionals useRocm rocmBuildInputs
++ optionals useBlas [ blas ]
- ++ optionals useVulkan vulkanBuildInputs
- ++ optionals enableCurl [ curl ];
+ ++ optionals useVulkan vulkanBuildInputs;
cmakeFlags =
[
(cmakeBool "LLAMA_BUILD_SERVER" true)
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
- (cmakeBool "LLAMA_CURL" enableCurl)
(cmakeBool "GGML_NATIVE" false)
(cmakeBool "GGML_BLAS" useBlas)
(cmakeBool "GGML_CUDA" useCuda)
diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile
index 53c3ed8d88..14936f8e9c 100644
--- a/.devops/rocm.Dockerfile
+++ b/.devops/rocm.Dockerfile
@@ -27,7 +27,7 @@ RUN apt-get update \
build-essential \
cmake \
git \
- libcurl4-openssl-dev \
+ libssl-dev \
curl \
libgomp1
diff --git a/.devops/s390x.Dockerfile b/.devops/s390x.Dockerfile
index 1e66f061d5..757cd97cd4 100644
--- a/.devops/s390x.Dockerfile
+++ b/.devops/s390x.Dockerfile
@@ -11,7 +11,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt install -y --no-install-recommends \
git cmake ccache ninja-build \
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
- libopenblas-dev libcurl4-openssl-dev && \
+ libopenblas-dev libssl-dev && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile
index 89831ed5c2..9797c5e0f3 100644
--- a/.devops/vulkan.Dockerfile
+++ b/.devops/vulkan.Dockerfile
@@ -5,8 +5,8 @@ FROM ubuntu:$UBUNTU_VERSION AS build
# Install build tools
RUN apt update && apt install -y git build-essential cmake wget xz-utils
-# Install cURL and Vulkan SDK dependencies
-RUN apt install -y libcurl4-openssl-dev curl \
+# Install SSL and Vulkan SDK dependencies
+RUN apt install -y libssl-dev curl \
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libvulkan-dev glslc
# Build it
diff --git a/.github/labeler.yml b/.github/labeler.yml
index d8ada150c5..08cfd7e0bc 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -89,7 +89,10 @@ nix:
embedding:
- changed-files:
- any-glob-to-any-file: examples/embedding/
-
+jinja parser:
+ - changed-files:
+ - any-glob-to-any-file:
+ - common/jinja/**
Ascend NPU:
- changed-files:
- any-glob-to-any-file:
diff --git a/.github/workflows/build-cache.yml b/.github/workflows/build-cache.yml
index 6a22e41c3b..3de0be9fad 100644
--- a/.github/workflows/build-cache.yml
+++ b/.github/workflows/build-cache.yml
@@ -16,7 +16,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Get latest Vulkan SDK version
id: vulkan_sdk_version
@@ -24,7 +24,7 @@ jobs:
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
- name: Setup Cache
- uses: actions/cache@v4
+ uses: actions/cache@v5
id: cache-sdk
with:
path: ./vulkan_sdk
@@ -47,10 +47,10 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Setup Cache
- uses: actions/cache@v4
+ uses: actions/cache@v5
id: cache-toolchain
with:
path: ./spacemit_toolchain
@@ -73,10 +73,10 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Setup Cache
- uses: actions/cache@v4
+ uses: actions/cache@v5
id: cache-rocm
with:
path: C:\Program Files\AMD\ROCm
diff --git a/.github/workflows/build-cmake-pkg.yml b/.github/workflows/build-cmake-pkg.yml
index fee2ab96bd..259efa43c8 100644
--- a/.github/workflows/build-cmake-pkg.yml
+++ b/.github/workflows/build-cmake-pkg.yml
@@ -7,7 +7,7 @@ jobs:
linux:
runs-on: ubuntu-24.04
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -20,7 +20,7 @@ jobs:
run: |
PREFIX="$(pwd)"/inst
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \
- -DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \
+ -DLLAMA_OPENSSL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release
cmake --build build --config Release
cmake --install build --prefix "$PREFIX" --config Release
diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml
index c2c6ea12ae..8b6ebaf4a3 100644
--- a/.github/workflows/build-linux-cross.yml
+++ b/.github/workflows/build-linux-cross.yml
@@ -8,7 +8,7 @@ jobs:
# runs-on: ubuntu-24.04
# steps:
- # - uses: actions/checkout@v4
+ # - uses: actions/checkout@v6
# - name: Setup Riscv
# run: |
# sudo dpkg --add-architecture riscv64
@@ -30,7 +30,7 @@ jobs:
# - name: Build
# run: |
- # cmake -B build -DLLAMA_CURL=OFF \
+ # cmake -B build -DLLAMA_OPENSSL=OFF \
# -DCMAKE_BUILD_TYPE=Release \
# -DGGML_OPENMP=OFF \
# -DLLAMA_BUILD_EXAMPLES=ON \
@@ -52,7 +52,7 @@ jobs:
# runs-on: ubuntu-24.04
# steps:
- # - uses: actions/checkout@v4
+ # - uses: actions/checkout@v6
# - name: Setup Riscv
# run: |
# sudo dpkg --add-architecture riscv64
@@ -76,7 +76,7 @@ jobs:
# - name: Build
# run: |
- # cmake -B build -DLLAMA_CURL=OFF \
+ # cmake -B build -DLLAMA_OPENSSL=OFF \
# -DCMAKE_BUILD_TYPE=Release \
# -DGGML_VULKAN=ON \
# -DGGML_OPENMP=OFF \
@@ -99,7 +99,7 @@ jobs:
# runs-on: ubuntu-24.04
# steps:
- # - uses: actions/checkout@v4
+ # - uses: actions/checkout@v6
# - name: Setup Arm64
# run: |
# sudo dpkg --add-architecture arm64
@@ -122,7 +122,7 @@ jobs:
# - name: Build
# run: |
- # cmake -B build -DLLAMA_CURL=OFF \
+ # cmake -B build -DLLAMA_OPENSSL=OFF \
# -DCMAKE_BUILD_TYPE=Release \
# -DGGML_VULKAN=ON \
# -DGGML_OPENMP=OFF \
@@ -146,7 +146,7 @@ jobs:
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- name: Setup LoongArch
run: |
rm -f /etc/apt/sources.list.d/*
@@ -178,7 +178,7 @@ jobs:
- name: Build
run: |
- cmake -B build -DLLAMA_CURL=OFF \
+ cmake -B build -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
@@ -201,7 +201,7 @@ jobs:
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- name: Setup LoongArch
run: |
rm -f /etc/apt/sources.list.d/*
@@ -235,7 +235,7 @@ jobs:
- name: Build
run: |
- cmake -B build -DLLAMA_CURL=OFF \
+ cmake -B build -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_VULKAN=ON \
-DGGML_OPENMP=OFF \
@@ -262,10 +262,10 @@ jobs:
SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- name: Use SpacemiT Toolchain Cache
- uses: actions/cache@v4
+ uses: actions/cache@v5
id: cache-toolchain
with:
path: ./spacemit_toolchain
@@ -281,7 +281,7 @@ jobs:
- name: Build
run: |
export RISCV_ROOT_PATH=${PWD}/spacemit_toolchain
- cmake -B build -DLLAMA_CURL=OFF \
+ cmake -B build -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 3c89b4fab6..551bdd3df0 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -63,7 +63,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -79,7 +79,6 @@ jobs:
cmake -B build \
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
- -DLLAMA_CURL=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=OFF \
@@ -92,7 +91,7 @@ jobs:
id: cmake_test
run: |
cd build
- ctest -L 'main|curl' --verbose --timeout 900
+ ctest -L main --verbose --timeout 900
macOS-latest-cmake-x64:
runs-on: macos-15-intel
@@ -100,7 +99,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -118,7 +117,6 @@ jobs:
cmake -B build \
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
- -DLLAMA_CURL=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DGGML_METAL=OFF \
-DGGML_RPC=ON \
@@ -137,7 +135,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -191,7 +189,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -227,8 +225,6 @@ jobs:
id: cmake_build
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_RPC=ON
cmake --build build --config Release -j $(nproc)
@@ -237,7 +233,7 @@ jobs:
id: cmake_test
run: |
cd build
- ctest -L 'main|curl' --verbose --timeout 900
+ ctest -L main --verbose --timeout 900
- name: Test llama2c conversion
id: llama2c_test
@@ -273,7 +269,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -293,8 +289,6 @@ jobs:
if: ${{ matrix.sanitizer != 'THREAD' }}
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
@@ -305,8 +299,6 @@ jobs:
if: ${{ matrix.sanitizer == 'THREAD' }}
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
@@ -325,7 +317,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Dependencies
id: depends
@@ -336,14 +328,10 @@ jobs:
- name: Build
id: cmake_build
run: |
- mkdir build
- cd build
- cmake .. \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
+ cmake -B build \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_LLGUIDANCE=ON
- cmake --build . --config Release -j $(nproc)
+ cmake --build build --config Release -j $(nproc)
- name: Test
id: cmake_test
@@ -359,7 +347,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
# - name: ccache
# uses: ggml-org/ccache-action@v1.2.16
@@ -377,8 +365,6 @@ jobs:
id: cmake_build
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_RPC=ON
cmake --build build --config Release -j $(nproc)
@@ -394,7 +380,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -412,8 +398,6 @@ jobs:
id: cmake_configure
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DGGML_BACKEND_DL=ON \
-DGGML_CPU_ALL_VARIANTS=ON \
@@ -430,7 +414,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -452,7 +436,7 @@ jobs:
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
- name: Use Vulkan SDK Cache
- uses: actions/cache@v4
+ uses: actions/cache@v5
id: cache-sdk
with:
path: ./vulkan_sdk
@@ -470,8 +454,6 @@ jobs:
run: |
source ./vulkan_sdk/setup-env.sh
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_VULKAN=ON
cmake --build build --config Release -j $(nproc)
@@ -490,7 +472,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -512,7 +494,7 @@ jobs:
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
- name: Use Vulkan SDK Cache
- uses: actions/cache@v4
+ uses: actions/cache@v5
id: cache-sdk
with:
path: ./vulkan_sdk
@@ -545,8 +527,6 @@ jobs:
run: |
export Dawn_DIR=dawn/lib64/cmake/Dawn
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_WEBGPU=ON
cmake --build build --config Release -j $(nproc)
@@ -563,7 +543,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -593,7 +573,7 @@ jobs:
source emsdk/emsdk_env.sh
emcmake cmake -B build-wasm \
-DGGML_WEBGPU=ON \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg
cmake --build build-wasm --target test-backend-ops -j $(nproc)
@@ -605,7 +585,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Dependencies
id: depends
@@ -624,8 +604,6 @@ jobs:
id: cmake_build
run: |
cmake -B build -S . \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DGGML_HIP=ON
@@ -638,7 +616,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Dependencies
id: depends
@@ -657,8 +635,6 @@ jobs:
id: cmake_build
run: |
cmake -B build -S . \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_MUSA=ON
cmake --build build --config Release -j $(nproc)
@@ -668,7 +644,7 @@ jobs:
continue-on-error: true
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- name: add oneAPI to apt
shell: bash
@@ -692,7 +668,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -706,8 +682,6 @@ jobs:
run: |
source /opt/intel/oneapi/setvars.sh
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx
@@ -719,7 +693,7 @@ jobs:
continue-on-error: true
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- name: add oneAPI to apt
shell: bash
@@ -743,7 +717,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -757,8 +731,6 @@ jobs:
run: |
source /opt/intel/oneapi/setvars.sh
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
@@ -777,7 +749,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -809,7 +781,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -841,7 +813,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Build
id: cmake_build
@@ -871,7 +843,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -881,7 +853,7 @@ jobs:
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Download xcframework artifact
- uses: actions/download-artifact@v4
+ uses: actions/download-artifact@v7
with:
name: llama-xcframework
path: build-apple/llama.xcframework/
@@ -893,7 +865,7 @@ jobs:
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
@@ -913,7 +885,7 @@ jobs:
steps:
- name: Clone
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -982,7 +954,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1043,7 +1015,7 @@ jobs:
id: cmake_build
run: |
cmake -S . -B build ${{ matrix.defines }} `
- -DLLAMA_CURL=OFF -DLLAMA_BUILD_BORINGSSL=ON
+ -DLLAMA_BUILD_BORINGSSL=ON
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Add libopenblas.dll
@@ -1081,7 +1053,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Install dependencies
env:
@@ -1101,8 +1073,6 @@ jobs:
# TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project
run: |
cmake -S . -B build -G Ninja \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CUDA_ARCHITECTURES=89-real \
@@ -1122,7 +1092,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Install ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1150,7 +1120,6 @@ jobs:
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
cmake -S . -B build -G "Ninja Multi-Config" ^
-DLLAMA_BUILD_SERVER=ON ^
- -DLLAMA_CURL=OFF ^
-DLLAMA_BUILD_BORINGSSL=ON ^
-DGGML_NATIVE=OFF ^
-DGGML_BACKEND_DL=ON ^
@@ -1176,7 +1145,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1208,7 +1177,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Grab rocWMMA package
id: grab_rocwmma
@@ -1218,7 +1187,7 @@ jobs:
7z x data.tar
- name: Use ROCm Installation Cache
- uses: actions/cache@v4
+ uses: actions/cache@v5
id: cache-rocm
with:
path: C:\Program Files\AMD\ROCm
@@ -1258,7 +1227,6 @@ jobs:
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-${{ env.ROCM_VERSION }}/include/" `
-DCMAKE_BUILD_TYPE=Release `
- -DLLAMA_CURL=OFF `
-DLLAMA_BUILD_BORINGSSL=ON `
-DROCM_DIR="${env:HIP_PATH}" `
-DGGML_HIP=ON `
@@ -1271,7 +1239,7 @@ jobs:
steps:
- name: Checkout code
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Setup Xcode
uses: maxim-lobanov/setup-xcode@v1
@@ -1285,7 +1253,7 @@ jobs:
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
@@ -1301,7 +1269,7 @@ jobs:
./build-xcframework.sh
- name: Upload xcframework artifact
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
name: llama-xcframework
path: build-apple/llama.xcframework/
@@ -1317,7 +1285,7 @@ jobs:
steps:
- name: Clone
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
# Disabled due to size (400MB) and always 0 cache hits
# - name: ccache
@@ -1327,7 +1295,7 @@ jobs:
# evict-old-files: 1d
- name: Set up JDK
- uses: actions/setup-java@v3
+ uses: actions/setup-java@v5
with:
java-version: 17
distribution: zulu
@@ -1352,14 +1320,14 @@ jobs:
matrix:
include:
- build: 'arm64-cpu'
- defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_CURL=OFF -D GGML_OPENMP=OFF'
+ defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF'
- build: 'arm64-snapdragon'
defines: '--preset arm64-android-snapdragon-release'
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Install OpenCL Headers and Libs
id: install_opencl
@@ -1426,10 +1394,15 @@ jobs:
arch: [x86, aarch64]
chip_type: ['910b', '310p']
build: ['Release']
+ use_acl_graph: ['on', 'off']
+ exclude:
+ # 310P does not support USE_ACL_GRAPH=on
+ - chip_type: '310p'
+ use_acl_graph: 'on'
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
steps:
- name: Checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -1451,6 +1424,7 @@ jobs:
env:
BUILD_TYPE: ${{ matrix.build }}
SOC_TYPE: ascend${{ matrix.chip_type }}
+ USE_ACL_GRAPH: ${{ matrix.use_acl_graph }}
run: |
HOST_UID=$(id -u)
HOST_GID=$(id -g)
@@ -1460,6 +1434,7 @@ jobs:
-w /workspace \
-e SOC_TYPE=${SOC_TYPE} \
-e BUILD_TYPE=${BUILD_TYPE} \
+ -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \
"${{ steps.cann-image.outputs.image }}" \
bash -lc '
set -e
@@ -1469,10 +1444,9 @@ jobs:
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
cmake -S . -B build \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_CANN=on \
- -DSOC_TYPE=${SOC_TYPE}
+ -DSOC_TYPE=${SOC_TYPE} \
+ -DUSE_ACL_GRAPH=${USE_ACL_GRAPH}
cmake --build build -j $(nproc)
chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
@@ -1486,7 +1460,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1499,7 +1473,7 @@ jobs:
id: depends
run: |
sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ sudo apt-get install build-essential
- name: Test
id: ggml-ci
@@ -1512,7 +1486,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1525,7 +1499,7 @@ jobs:
id: depends
run: |
sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ sudo apt-get install build-essential
- name: Test
id: ggml-ci
@@ -1538,7 +1512,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1551,7 +1525,7 @@ jobs:
id: depends
run: |
sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ sudo apt-get install build-essential
- name: Test
id: ggml-ci
@@ -1564,7 +1538,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1577,7 +1551,7 @@ jobs:
id: depends
run: |
sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ sudo apt-get install build-essential
- name: Test
id: ggml-ci
@@ -1590,7 +1564,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1603,7 +1577,7 @@ jobs:
id: depends
run: |
sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ sudo apt-get install build-essential
- name: Test
id: ggml-ci
@@ -1616,7 +1590,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Test
id: ggml-ci
@@ -1630,7 +1604,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Test
id: ggml-ci
@@ -1644,7 +1618,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Test
id: ggml-ci
@@ -1658,7 +1632,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Test
id: ggml-ci
@@ -1671,7 +1645,7 @@ jobs:
# steps:
# - name: Clone
# id: checkout
- # uses: actions/checkout@v4
+ # uses: actions/checkout@v6
# - name: Test
# id: ggml-ci
@@ -1685,7 +1659,7 @@ jobs:
# steps:
# - name: Clone
# id: checkout
- # uses: actions/checkout@v4
+ # uses: actions/checkout@v6
# - name: Test
# id: ggml-ci
@@ -1699,7 +1673,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Test
id: ggml-ci
@@ -1712,7 +1686,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Dawn Dependency
id: dawn-depends
@@ -1740,7 +1714,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Test
id: ggml-ci
@@ -1754,7 +1728,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -1767,7 +1741,7 @@ jobs:
id: depends
run: |
sudo apt-get update
- sudo apt-get install -y build-essential libcurl4-openssl-dev
+ sudo apt-get install -y build-essential
- name: Test
id: ggml-ci
@@ -1799,7 +1773,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Check environment
run: |
@@ -1834,8 +1808,6 @@ jobs:
id: cmake_build
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
@@ -1853,7 +1825,7 @@ jobs:
id: cmake_test
run: |
cd build
- ctest -L 'main|curl' --verbose --timeout 900
+ ctest -L main --verbose --timeout 900
- name: Test llama2c conversion
id: llama2c_test
@@ -1903,7 +1875,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Setup ccache
run: |
@@ -1928,7 +1900,7 @@ jobs:
if: ${{ matrix.sanitizer != 'THREAD' }}
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DGGML_OPENMP=ON \
-DLLAMA_BUILD_EXAMPLES=ON \
@@ -1947,7 +1919,7 @@ jobs:
if: ${{ matrix.sanitizer == 'THREAD' }}
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
@@ -1997,7 +1969,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Setup ccache
run: |
@@ -2018,7 +1990,7 @@ jobs:
id: cmake_build
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
@@ -2071,7 +2043,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Setup ccache
run: |
@@ -2092,8 +2064,6 @@ jobs:
id: cmake_build
run: |
cmake -B build \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
@@ -2119,7 +2089,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Dependencies
id: depends
@@ -2129,7 +2099,6 @@ jobs:
sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \
apt-get install -y \
build-essential \
- libcurl4-openssl-dev \
python3-venv \
gpg \
wget \
diff --git a/.github/workflows/check-vendor.yml b/.github/workflows/check-vendor.yml
index 7b3016079c..b9e8ac7658 100644
--- a/.github/workflows/check-vendor.yml
+++ b/.github/workflows/check-vendor.yml
@@ -23,12 +23,12 @@ jobs:
steps:
- name: Checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Setup Python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v6
with:
python-version: '3.x'
diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml
index cbfc4990db..8fb5310d0b 100644
--- a/.github/workflows/close-issue.yml
+++ b/.github/workflows/close-issue.yml
@@ -15,7 +15,7 @@ jobs:
issues: write
pull-requests: write
steps:
- - uses: actions/stale@v5
+ - uses: actions/stale@v10
with:
exempt-issue-labels: "refactoring,help wanted,good first issue,research 🔬,bug,roadmap"
days-before-issue-stale: 30
diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml
index 3645e30378..fc3cec5ea1 100644
--- a/.github/workflows/copilot-setup-steps.yml
+++ b/.github/workflows/copilot-setup-steps.yml
@@ -26,7 +26,7 @@ jobs:
# If you do not check out your code, Copilot will do this for you.
steps:
- name: Checkout code
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -38,14 +38,14 @@ jobs:
id: depends
run: |
sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ sudo apt-get install build-essential libssl-dev
# Install git-clang-format script for formatting only changed code
wget -O /tmp/git-clang-format https://raw.githubusercontent.com/llvm/llvm-project/release/18.x/clang/tools/clang-format/git-clang-format
sudo cp /tmp/git-clang-format /usr/local/bin/git-clang-format
sudo chmod +x /usr/local/bin/git-clang-format
- name: Set up Python
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.11'
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index d9fe0686d3..8062177ba5 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -49,7 +49,7 @@ jobs:
- { tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
steps:
- name: Check out the repo
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0 # preserve git history, so we can determine the build number
@@ -63,7 +63,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Log in to Docker Hub
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
@@ -208,7 +208,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
index f02b7c2194..a5cd590017 100644
--- a/.github/workflows/editorconfig.yml
+++ b/.github/workflows/editorconfig.yml
@@ -22,7 +22,7 @@ jobs:
editorconfig:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- uses: editorconfig-checker/action-editorconfig-checker@v2
with:
version: v3.0.3
diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml
index 3ca4d30581..5bdab0f157 100644
--- a/.github/workflows/gguf-publish.yml
+++ b/.github/workflows/gguf-publish.yml
@@ -24,9 +24,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- name: Set up Python
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.9.x'
- name: Install dependencies
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 0b0f300aa4..42f00c0cd8 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -9,9 +9,9 @@ jobs:
pull-requests: write
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
with:
repository: "ggml-org/llama.cpp"
- - uses: actions/labeler@v5
+ - uses: actions/labeler@v6
with:
configuration-path: '.github/labeler.yml'
diff --git a/.github/workflows/pre-tokenizer-hashes.yml b/.github/workflows/pre-tokenizer-hashes.yml
index dff998e239..8120df0e36 100644
--- a/.github/workflows/pre-tokenizer-hashes.yml
+++ b/.github/workflows/pre-tokenizer-hashes.yml
@@ -16,10 +16,10 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Set up Python
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.11'
diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml
index 46e80aecd0..08cdcb9d01 100644
--- a/.github/workflows/python-check-requirements.yml
+++ b/.github/workflows/python-check-requirements.yml
@@ -24,9 +24,9 @@ jobs:
name: check-requirements
steps:
- name: Check out source repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Set up Python environment
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Run check-requirements.sh script
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
index ddfdf73b8f..91dc4d78a4 100644
--- a/.github/workflows/python-lint.yml
+++ b/.github/workflows/python-lint.yml
@@ -19,9 +19,9 @@ jobs:
name: Lint
steps:
- name: Check out source repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Set up Python environment
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: flake8 Lint
diff --git a/.github/workflows/python-type-check.yml b/.github/workflows/python-type-check.yml
index 373bb60102..54d5fab5ba 100644
--- a/.github/workflows/python-type-check.yml
+++ b/.github/workflows/python-type-check.yml
@@ -24,9 +24,9 @@ jobs:
name: pyright type-check
steps:
- name: Check out source repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Set up Python environment
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Install Python dependencies
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 35e1fae697..1914c08489 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -27,7 +27,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -45,7 +45,6 @@ jobs:
-DCMAKE_INSTALL_RPATH='@loader_path' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DLLAMA_FATAL_WARNINGS=ON \
- -DLLAMA_CURL=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
@@ -64,7 +63,7 @@ jobs:
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz
name: llama-bin-macos-arm64.tar.gz
@@ -75,7 +74,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -95,7 +94,6 @@ jobs:
-DCMAKE_INSTALL_RPATH='@loader_path' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DLLAMA_FATAL_WARNINGS=ON \
- -DLLAMA_CURL=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DGGML_METAL=OFF \
-DGGML_RPC=ON \
@@ -113,7 +111,7 @@ jobs:
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz
name: llama-bin-macos-x64.tar.gz
@@ -135,7 +133,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -161,8 +159,6 @@ jobs:
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
-DLLAMA_FATAL_WARNINGS=ON \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
@@ -177,7 +173,7 @@ jobs:
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz
name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz
@@ -188,7 +184,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -212,8 +208,6 @@ jobs:
cmake -B build \
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_BACKEND_DL=ON \
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
@@ -232,7 +226,7 @@ jobs:
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
name: llama-bin-ubuntu-vulkan-x64.tar.gz
@@ -248,7 +242,7 @@ jobs:
steps:
- name: Clone
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -269,7 +263,6 @@ jobs:
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }}
cmake -S . -B build -G "Ninja Multi-Config" ^
-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
- -DLLAMA_CURL=OFF ^
-DLLAMA_BUILD_BORINGSSL=ON ^
-DGGML_NATIVE=OFF ^
-DGGML_BACKEND_DL=ON ^
@@ -285,7 +278,7 @@ jobs:
7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-bin-win-cpu-${{ matrix.arch }}.zip
name: llama-bin-win-cpu-${{ matrix.arch }}.zip
@@ -312,7 +305,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -358,7 +351,7 @@ jobs:
- name: Build
id: cmake_build
run: |
- cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_CURL=OFF
+ cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_BUILD_BORINGSSL=ON
cmake --build build --config Release --target ${{ matrix.target }}
- name: Pack artifacts
@@ -367,7 +360,7 @@ jobs:
7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
@@ -382,7 +375,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Install ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -412,7 +405,7 @@ jobs:
-DGGML_NATIVE=OFF ^
-DGGML_CPU=OFF ^
-DGGML_CUDA=ON ^
- -DLLAMA_CURL=OFF ^
+ -DLLAMA_BUILD_BORINGSSL=ON ^
-DGGML_CUDA_CUB_3DOT2=ON
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda
@@ -423,7 +416,7 @@ jobs:
7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
@@ -438,7 +431,7 @@ jobs:
7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\*
- name: Upload Cuda runtime
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
@@ -458,7 +451,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
@@ -481,7 +474,7 @@ jobs:
-DCMAKE_BUILD_TYPE=Release ^
-DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^
-DGGML_CPU=OFF -DGGML_SYCL=ON ^
- -DLLAMA_CURL=OFF
+ -DLLAMA_BUILD_BORINGSSL=ON
cmake --build build --target ggml-sycl -j
- name: Build the release package
@@ -518,7 +511,7 @@ jobs:
7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/*
- name: Upload the release package
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-bin-win-sycl-x64.zip
name: llama-bin-win-sycl-x64.zip
@@ -538,7 +531,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Grab rocWMMA package
id: grab_rocwmma
@@ -549,7 +542,7 @@ jobs:
- name: Cache ROCm Installation
id: cache-rocm
- uses: actions/cache@v4
+ uses: actions/cache@v5
with:
path: C:\Program Files\AMD\ROCm
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
@@ -608,7 +601,7 @@ jobs:
-DAMDGPU_TARGETS="${{ matrix.gpu_targets }}" `
-DGGML_HIP_ROCWMMA_FATTN=ON `
-DGGML_HIP=ON `
- -DLLAMA_CURL=OFF
+ -DLLAMA_BUILD_BORINGSSL=ON
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
md "build\bin\rocblas\library\"
md "build\bin\hipblaslt\library"
@@ -624,7 +617,7 @@ jobs:
7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-bin-win-hip-${{ matrix.name }}-x64.zip
name: llama-bin-win-hip-${{ matrix.name }}-x64.zip
@@ -634,7 +627,7 @@ jobs:
steps:
- name: Checkout code
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -649,7 +642,7 @@ jobs:
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
@@ -679,7 +672,7 @@ jobs:
zip -r -y llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
name: llama-${{ steps.tag.outputs.name }}-xcframework.zip
@@ -688,13 +681,29 @@ jobs:
openEuler-cann:
strategy:
matrix:
- arch: [x86, aarch64]
- chip_type: ['910b', '310p']
- build: ['Release']
+ include:
+ # 910b with aclgraph (both architectures)
+ - arch: x86
+ chip_type: '910b'
+ build: 'Release'
+ use_acl_graph: 'on'
+ - arch: aarch64
+ chip_type: '910b'
+ build: 'Release'
+ use_acl_graph: 'on'
+ # 310p without aclgraph (both architectures)
+ - arch: x86
+ chip_type: '310p'
+ build: 'Release'
+ use_acl_graph: 'off'
+ - arch: aarch64
+ chip_type: '310p'
+ build: 'Release'
+ use_acl_graph: 'off'
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
steps:
- name: Checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -716,6 +725,7 @@ jobs:
env:
BUILD_TYPE: ${{ matrix.build }}
SOC_TYPE: ascend${{ matrix.chip_type }}
+ USE_ACL_GRAPH: ${{ matrix.use_acl_graph }}
run: |
HOST_UID=$(id -u)
HOST_GID=$(id -g)
@@ -725,6 +735,7 @@ jobs:
-w /workspace \
-e SOC_TYPE=${SOC_TYPE} \
-e BUILD_TYPE=${BUILD_TYPE} \
+ -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \
"${{ steps.cann-image.outputs.image }}" \
bash -lc '
set -e
@@ -734,10 +745,9 @@ jobs:
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
cmake -S . -B build \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DGGML_CANN=on \
- -DSOC_TYPE=${SOC_TYPE}
+ -DSOC_TYPE=${SOC_TYPE} \
+ -DUSE_ACL_GRAPH=${USE_ACL_GRAPH}
cmake --build build -j $(nproc)
chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
@@ -750,13 +760,13 @@ jobs:
- name: Pack artifacts
run: |
cp LICENSE ./build/bin/
- tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
- path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz
- name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz
+ path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
+ name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
release:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
@@ -784,7 +794,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
@@ -794,7 +804,7 @@ jobs:
- name: Download artifacts
id: download-artifact
- uses: actions/download-artifact@v4
+ uses: actions/download-artifact@v7
with:
path: ./artifact
merge-multiple: true
@@ -871,13 +881,13 @@ jobs:
**openEuler:**
- [openEuler x86 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-x86.tar.gz)
- - [openEuler x86 (910b)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-x86.tar.gz)
+ - [openEuler x86 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-x86-aclgraph.tar.gz)
- [openEuler aarch64 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-aarch64.tar.gz)
- - [openEuler aarch64 (910b)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64.tar.gz)
+ - [openEuler aarch64 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64-aclgraph.tar.gz)
- name: Upload release
id: upload_release
- uses: actions/github-script@v3
+ uses: actions/github-script@v8
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
@@ -887,7 +897,7 @@ jobs:
for (let file of await fs.readdirSync('./release')) {
if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) {
console.log('uploadReleaseAsset', file);
- await github.repos.uploadReleaseAsset({
+ await github.rest.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release_id,
diff --git a/.github/workflows/server-webui.yml b/.github/workflows/server-webui.yml
index 544c4ad408..6d1b617371 100644
--- a/.github/workflows/server-webui.yml
+++ b/.github/workflows/server-webui.yml
@@ -37,14 +37,14 @@ jobs:
continue-on-error: true
steps:
- name: Checkout code
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
id: node
- uses: actions/setup-node@v4
+ uses: actions/setup-node@v6
with:
node-version: "22"
cache: "npm"
@@ -131,14 +131,14 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Python setup
id: setup_python
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.11'
@@ -148,7 +148,7 @@ jobs:
pip install -r tools/server/tests/requirements.txt
- name: Setup Node.js for WebUI
- uses: actions/setup-node@v4
+ uses: actions/setup-node@v6
with:
node-version: "22"
cache: "npm"
@@ -168,8 +168,6 @@ jobs:
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
@@ -182,8 +180,6 @@ jobs:
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
@@ -195,8 +191,6 @@ jobs:
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
- -DLLAMA_CURL=OFF \
- -DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
index 5694feb2c9..9f1ef48c82 100644
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -64,7 +64,7 @@ jobs:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
@@ -72,12 +72,12 @@ jobs:
- name: Build
id: cmake_build
run: |
- cmake -B build -DLLAMA_CURL=OFF -DLLAMA_BUILD_BORINGSSL=ON
+ cmake -B build -DLLAMA_BUILD_BORINGSSL=ON -DGGML_SCHED_NO_REALLOC=ON
cmake --build build --config ${{ matrix.build_type }} -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
- name: Python setup
id: setup_python
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.11'
@@ -100,7 +100,7 @@ jobs:
steps:
- name: Clone
id: checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
@@ -108,12 +108,12 @@ jobs:
- name: Build
id: cmake_build
run: |
- cmake -B build -DLLAMA_CURL=OFF -DLLAMA_BUILD_BORINGSSL=ON
+ cmake -B build -DLLAMA_BUILD_BORINGSSL=ON -DGGML_SCHED_NO_REALLOC=ON
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
- name: Python setup
id: setup_python
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.11'
diff --git a/.github/workflows/update-ops-docs.yml b/.github/workflows/update-ops-docs.yml
index d5e264b34f..40447db4e4 100644
--- a/.github/workflows/update-ops-docs.yml
+++ b/.github/workflows/update-ops-docs.yml
@@ -18,10 +18,10 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v6
- name: Set up Python
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: '3.x'
diff --git a/.github/workflows/winget.yml b/.github/workflows/winget.yml
index d3d9be23ce..7506091647 100644
--- a/.github/workflows/winget.yml
+++ b/.github/workflows/winget.yml
@@ -21,7 +21,7 @@ jobs:
- name: Find latest release
id: find_latest_release
- uses: actions/github-script@v6
+ uses: actions/github-script@v8
with:
script: |
const { data: releases } = await github.rest.repos.listReleases({
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 44c2166210..d24fa080ae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -111,11 +111,16 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
# 3rd party libs
-option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
-option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
-option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
+option(LLAMA_HTTPLIB "llama: httplib for downloading functionality" ON)
+option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" ON)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
+# deprecated
+option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
+if (LLAMA_CURL)
+ message(WARNING "LLAMA_CURL option is deprecated and will be ignored")
+endif()
+
# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
@@ -212,11 +217,6 @@ add_subdirectory(src)
# utils, programs, examples and tests
#
-if (NOT LLAMA_BUILD_COMMON)
- message(STATUS "LLAMA_BUILD_COMMON is OFF, disabling LLAMA_CURL")
- set(LLAMA_CURL OFF)
-endif()
-
if (LLAMA_BUILD_COMMON)
add_subdirectory(common)
if (LLAMA_HTTPLIB)
diff --git a/CODEOWNERS b/CODEOWNERS
index 750096d9a1..55f5011dfa 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -15,6 +15,7 @@
/common/common.* @ggerganov
/common/console.* @ggerganov
/common/http.* @angt
+/common/jinja/ @ngxson @CISC @aldehir
/common/llguidance.* @ggerganov
/common/log.* @ggerganov
/common/peg-parser.* @aldehir
diff --git a/README.md b/README.md
index 0d9d1ef6b4..91a8f25d1c 100644
--- a/README.md
+++ b/README.md
@@ -585,7 +585,5 @@ $ echo "source ~/.llama-completion.bash" >> ~/.bashrc
- [yhirose/cpp-httplib](https://github.com/yhirose/cpp-httplib) - Single-header HTTP server, used by `llama-server` - MIT license
- [stb-image](https://github.com/nothings/stb) - Single-header image format decoder, used by multimodal subsystem - Public domain
- [nlohmann/json](https://github.com/nlohmann/json) - Single-header JSON library, used by various tools/examples - MIT License
-- [minja](https://github.com/google/minja) - Minimal Jinja parser in C++, used by various tools/examples - MIT License
-- [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html)
- [miniaudio.h](https://github.com/mackron/miniaudio) - Single-header audio format decoder, used by multimodal subsystem - Public domain
- [subprocess.h](https://github.com/sheredom/subprocess.h) - Single-header process launching solution for C and C++ - Public domain
diff --git a/build-xcframework.sh b/build-xcframework.sh
index 81280f7497..0eec871139 100755
--- a/build-xcframework.sh
+++ b/build-xcframework.sh
@@ -414,7 +414,7 @@ cmake -B build-ios-sim -G Xcode \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-S .
cmake --build build-ios-sim --config Release -- -quiet
@@ -428,7 +428,7 @@ cmake -B build-ios-device -G Xcode \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-S .
cmake --build build-ios-device --config Release -- -quiet
@@ -439,7 +439,7 @@ cmake -B build-macos -G Xcode \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-S .
cmake --build build-macos --config Release -- -quiet
@@ -453,7 +453,7 @@ cmake -B build-visionos -G Xcode \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DLLAMA_HTTPLIB=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-S .
@@ -469,7 +469,7 @@ cmake -B build-visionos-sim -G Xcode \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-DLLAMA_HTTPLIB=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-S .
@@ -487,7 +487,7 @@ cmake -B build-tvos-sim -G Xcode \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-S .
cmake --build build-tvos-sim --config Release -- -quiet
@@ -502,7 +502,7 @@ cmake -B build-tvos-device -G Xcode \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
- -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=OFF \
-S .
cmake --build build-tvos-device --config Release -- -quiet
diff --git a/ci/run.sh b/ci/run.sh
index 67b9784ef4..dfcf959661 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -45,7 +45,7 @@ sd=`dirname $0`
cd $sd/../
SRC=`pwd`
-CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=${LLAMA_FATAL_WARNINGS:-ON} -DLLAMA_CURL=ON -DGGML_SCHED_NO_REALLOC=ON"
+CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=${LLAMA_FATAL_WARNINGS:-ON} -DLLAMA_OPENSSL=OFF -DGGML_SCHED_NO_REALLOC=ON"
if [ ! -z ${GG_BUILD_METAL} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
@@ -254,7 +254,7 @@ function gg_run_ctest_release {
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
if [ -z ${GG_BUILD_LOW_PERF} ]; then
- (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+ (time ctest --output-on-failure -L 'main|python' ) 2>&1 | tee -a $OUT/${ci}-ctest.log
else
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
fi
diff --git a/cmake/download-models.cmake b/cmake/download-models.cmake
new file mode 100644
index 0000000000..de252906a0
--- /dev/null
+++ b/cmake/download-models.cmake
@@ -0,0 +1,21 @@
+get_filename_component(DEST_DIR "${DEST}" DIRECTORY)
+file(MAKE_DIRECTORY "${DEST_DIR}")
+
+if(NOT EXISTS "${DEST}")
+ message(STATUS "Downloading ${NAME} from ggml-org/models...")
+endif()
+
+file(DOWNLOAD
+ "https://huggingface.co/ggml-org/models/resolve/main/${NAME}?download=true"
+ "${DEST}"
+ TLS_VERIFY ON
+ EXPECTED_HASH ${HASH}
+ STATUS status
+)
+
+list(GET status 0 code)
+
+if(NOT code EQUAL 0)
+ list(GET status 1 msg)
+ message(FATAL_ERROR "Failed to download ${NAME}: ${msg}")
+endif()
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 55222bdf61..ae02c0bd77 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -60,6 +60,8 @@ add_library(${TARGET} STATIC
common.h
console.cpp
console.h
+ debug.cpp
+ debug.h
download.cpp
download.h
http.h
@@ -83,6 +85,18 @@ add_library(${TARGET} STATIC
speculative.h
unicode.cpp
unicode.h
+ jinja/lexer.cpp
+ jinja/lexer.h
+ jinja/parser.cpp
+ jinja/parser.h
+ jinja/runtime.cpp
+ jinja/runtime.h
+ jinja/value.cpp
+ jinja/value.h
+ jinja/string.cpp
+ jinja/string.h
+ jinja/caps.cpp
+ jinja/caps.h
)
target_include_directories(${TARGET} PUBLIC . ../vendor)
@@ -95,17 +109,7 @@ endif()
# TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
set(LLAMA_COMMON_EXTRA_LIBS build_info)
-if (LLAMA_CURL)
- # Use curl to download model url
- find_package(CURL)
- if (NOT CURL_FOUND)
- message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
- endif()
- target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
- include_directories(${CURL_INCLUDE_DIRS})
- set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
-elseif (LLAMA_HTTPLIB)
- # otherwise, use cpp-httplib
+if (LLAMA_HTTPLIB)
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
endif()
diff --git a/common/arg.cpp b/common/arg.cpp
index 4b96c312f3..163c9b71b0 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -341,7 +341,7 @@ static handle_model_result common_params_handle_model(
if (model.path.empty()) {
auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token, offline);
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
- exit(1); // built without CURL, error message already printed
+ exit(1); // error message already printed
}
model.name = model.hf_repo; // repo name with tag
model.hf_repo = auto_detected.repo; // repo name without tag
@@ -1729,6 +1729,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
}
).set_sparam());
+ add_opt(common_arg(
+ {"--adaptive-target"}, "N",
+ string_format("adaptive-p: select tokens near this probability (valid range 0.0 "
+ "to 1.0; negative = disabled) (default: %.2f)\n"
+ "[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
+ (double)params.sampling.adaptive_target),
+ [](common_params & params, const std::string & value) {
+ params.sampling.adaptive_target = std::stof(value);
+ }
+ ).set_sparam());
+ add_opt(common_arg(
+ {"--adaptive-decay"}, "N",
+ string_format("adaptive-p: decay rate for target adaptation over time. lower values "
+ "are more reactive, higher values are more stable.\n"
+ "(valid range 0.0 to 0.99) (default: %.2f)",
+ (double)params.sampling.adaptive_decay),
+ [](common_params & params, const std::string & value) {
+ params.sampling.adaptive_decay = std::stof(value);
+ }
+ ).set_sparam());
add_opt(common_arg(
{"--dynatemp-range"}, "N",
string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
index 23e23ca8c7..29819e48d3 100644
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -129,7 +129,7 @@ static void parse_json_tool_calls(
}
}
-common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
+common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
: input_(input), is_partial_(is_partial), syntax_(syntax)
{
result_.role = "assistant";
@@ -1403,6 +1403,118 @@ static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
builder.add_content(builder.consume_rest());
}
+static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) {
+ // 1) { "name": "...", "arguments": {...} }
+ // 2) { "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } }
+ static const common_regex tool_call_open(R"(]*>)");
+
+ if (!builder.syntax().parse_tool_calls) {
+ LOG_DBG("%s: not parse_tool_calls\n", __func__);
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ LOG_DBG("%s: parse_tool_calls\n", __func__);
+
+ // Find all blocks
+ while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) {
+ builder.move_to(first->groups[0].end);
+ builder.consume_spaces();
+
+ builder.try_consume_literal("```json");
+ builder.try_consume_literal("```");
+ builder.consume_spaces();
+
+ // Consume JSON object
+ auto data = builder.consume_json();
+
+ builder.consume_spaces();
+ builder.try_consume_literal("```");
+ builder.consume_spaces();
+
+ if (!builder.try_consume_literal("")) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ builder.consume_spaces();
+
+ // Extract name and arguments
+ std::string name;
+ std::string id;
+ nlohmann::ordered_json arguments;
+
+ const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool {
+ if (!obj.contains("name") || !obj.contains("arguments")) {
+ return false;
+ }
+ name = obj.at("name").get();
+ arguments = obj.at("arguments");
+ if (obj.contains("id") && obj.at("id").is_string()) {
+ id = obj.at("id").get();
+ }
+ return true;
+ };
+
+ if (!extract_args(data.json)) {
+ if (data.json.contains("function") && data.json.at("function").is_object()) {
+ auto fn = data.json.at("function");
+ extract_args(fn);
+ if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) {
+ id = data.json.at("id").get();
+ }
+ }
+ }
+
+ // If name is empty, treat the JSON object as content
+ if (name.empty()) {
+ LOG_DBG("%s: tool call missing name, treating as content\n", __func__);
+ builder.add_content(data.json.dump());
+ continue;
+ }
+
+ std::string args_str = arguments.dump();
+ if (!builder.add_tool_call(name, id, args_str)) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ }
+
+ builder.add_content(builder.consume_rest());
+}
+
+static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
+ LOG_DBG("%s: parsing exaone_moe\n", __func__);
+ // EXAONE MoE outputs reasoning content between "" and "" tags, followed by regular content
+ // First try to parse using the standard reasoning parsing method
+ LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
+
+ auto start_pos = builder.pos();
+ auto found_end_think = builder.try_find_literal("");
+ builder.move_to(start_pos);
+
+ if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
+ LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
+ common_chat_parse_exaone_moe_content(builder);
+ } else if (builder.try_parse_reasoning("", "")) {
+ // If reasoning was parsed successfully, the remaining content is regular content
+ LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
+ common_chat_parse_exaone_moe_content(builder);
+ } else {
+ if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
+ LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
+ common_chat_parse_exaone_moe_content(builder);
+ return;
+ }
+ // If no reasoning tags found, check if we should treat everything as reasoning
+ if (builder.syntax().thinking_forced_open) {
+ // If thinking is forced open but no tags found, treat everything as reasoning
+ LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
+ builder.add_reasoning_content(builder.consume_rest());
+ } else {
+ LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
+ common_chat_parse_exaone_moe_content(builder);
+ }
+ }
+}
+
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
builder.try_parse_reasoning("", "");
builder.add_content(builder.consume_rest());
@@ -1490,13 +1602,16 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
case COMMON_CHAT_FORMAT_SOLAR_OPEN:
common_chat_parse_solar_open(builder);
break;
+ case COMMON_CHAT_FORMAT_EXAONE_MOE:
+ common_chat_parse_exaone_moe(builder);
+ break;
default:
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
}
builder.finish();
}
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
@@ -1515,12 +1630,12 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
}
auto msg = builder.result();
if (!is_partial) {
- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
}
return msg;
}
-common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
if (parser.empty()) {
throw std::runtime_error("Failed to parse due to missing parser definition.");
}
@@ -1548,7 +1663,7 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std
mapper.from_ast(ctx.ast, result);
}
if (!is_partial) {
- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
}
return msg;
}
diff --git a/common/chat-parser.h b/common/chat-parser.h
index 78c4b74c2d..3ed9c30a2b 100644
--- a/common/chat-parser.h
+++ b/common/chat-parser.h
@@ -5,7 +5,7 @@
#include "json-partial.h"
#include "regex-partial.h"
-#include
+#include
#include
#include
@@ -19,20 +19,20 @@ class common_chat_msg_partial_exception : public std::runtime_error {
class common_chat_msg_parser {
std::string input_;
bool is_partial_;
- common_chat_syntax syntax_;
+ common_chat_parser_params syntax_; // TODO: rename to params
std::string healing_marker_;
size_t pos_ = 0;
common_chat_msg result_;
public:
- common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
+ common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
const std::string & input() const { return input_; }
size_t pos() const { return pos_; }
const std::string & healing_marker() const { return healing_marker_; }
const bool & is_partial() const { return is_partial_; }
const common_chat_msg & result() const { return result_; }
- const common_chat_syntax & syntax() const { return syntax_; }
+ const common_chat_parser_params & syntax() const { return syntax_; }
void move_to(size_t pos) {
if (pos > input_.size()) {
diff --git a/common/chat.cpp b/common/chat.cpp
index 22e527bab8..aba26e97a1 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -7,8 +7,10 @@
#include "log.h"
#include "regex-partial.h"
-#include
-#include
+#include "jinja/parser.h"
+#include "jinja/value.h"
+#include "jinja/runtime.h"
+#include "jinja/caps.h"
#include
#include
@@ -51,39 +53,73 @@ static bool has_content_or_tool_calls(const common_chat_msg & msg) {
return !msg.content.empty() || !msg.tool_calls.empty();
}
-template <>
-json common_chat_msg::to_json_oaicompat() const
-{
- json message {
- {"role", "assistant"},
- };
- if (!reasoning_content.empty()) {
- message["reasoning_content"] = reasoning_content;
+json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
+ if (!content.empty() && !content_parts.empty()) {
+ throw std::runtime_error("Cannot specify both content and content_parts");
}
- if (content.empty() && !tool_calls.empty()) {
- message["content"] = json();
+ json jmsg {
+ {"role", role},
+ };
+ if (!content.empty()) {
+ jmsg["content"] = content;
+ } else if (!content_parts.empty()) {
+ if (concat_typed_text) {
+ std::string text;
+ for (const auto & part : content_parts) {
+ if (part.type != "text") {
+ LOG_WRN("Ignoring content part type: %s\n", part.type.c_str());
+ continue;
+ }
+ if (!text.empty()) {
+ text += '\n';
+ }
+ text += part.text;
+ }
+ jmsg["content"] = text;
+ } else {
+ auto & parts = jmsg["content"] = json::array();
+ for (const auto & part : content_parts) {
+ parts.push_back({
+ {"type", part.type},
+ {"text", part.text},
+ });
+ }
+ }
} else {
- message["content"] = content;
+ jmsg["content"] = "";
+ }
+ if (!reasoning_content.empty()) {
+ jmsg["reasoning_content"] = reasoning_content;
+ }
+ if (!tool_name.empty()) {
+ jmsg["name"] = tool_name;
+ }
+ if (!tool_call_id.empty()) {
+ jmsg["tool_call_id"] = tool_call_id;
}
if (!tool_calls.empty()) {
- auto arr = json::array();
- for (const auto & tc : tool_calls) {
- arr.push_back({
+ jmsg["tool_calls"] = json::array();
+ auto & jtool_calls = jmsg["tool_calls"];
+ for (const auto & tool_call : tool_calls) {
+ json tc {
{"type", "function"},
{"function", {
- {"name", tc.name},
- {"arguments", tc.arguments},
+ {"name", tool_call.name},
+ {"arguments", tool_call.arguments},
}},
- {"id", tc.id},
- // // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
- // // We only generate a random id for the ones that don't generate one by themselves
- // // (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
- // {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
- });
+ };
+ if (!tool_call.id.empty()) {
+ tc["id"] = tool_call.id;
+ }
+ // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
+ // We only generate a random id for the ones that don't generate one by themselves
+ // (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
+ // {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
+ jtool_calls.push_back(tc);
}
- message["tool_calls"] = arr;
}
- return message;
+
+ return jmsg;
}
std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
@@ -135,7 +171,68 @@ std::vector common_chat_msg_diff::compute_diffs(const comm
return diffs;
}
-typedef minja::chat_template common_chat_template;
+using chat_template_caps = jinja::caps;
+
+struct common_chat_template {
+ jinja::program prog;
+ std::string bos_tok;
+ std::string eos_tok;
+ std::string src;
+ chat_template_caps caps;
+
+ common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
+ jinja::lexer lexer;
+ auto lexer_res = lexer.tokenize(src);
+ this->prog = jinja::parse_from_tokens(lexer_res);
+
+ this->src = lexer_res.source;
+ this->bos_tok = bos_token;
+ this->eos_tok = eos_token;
+
+ this->caps = jinja::caps_get(prog);
+ // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
+ }
+
+ const std::string & source() const { return src; }
+ const std::string & bos_token() const { return bos_tok; }
+ const std::string & eos_token() const { return eos_tok; }
+
+ // TODO: this is ugly, refactor it somehow
+ json add_system(const json & messages, const std::string & system_prompt) const {
+ GGML_ASSERT(messages.is_array());
+ auto msgs_copy = messages;
+ if (!caps.supports_system_role) {
+ if (msgs_copy.empty()) {
+ msgs_copy.insert(msgs_copy.begin(), json{
+ {"role", "user"},
+ {"content", system_prompt}
+ });
+ } else {
+ auto & first_msg = msgs_copy[0];
+ if (!first_msg.contains("content")) {
+ first_msg["content"] = "";
+ }
+ first_msg["content"] = system_prompt + "\n\n"
+ + first_msg["content"].get();
+ }
+ } else {
+ if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
+ msgs_copy.insert(msgs_copy.begin(), json{
+ {"role", "system"},
+ {"content", system_prompt}
+ });
+ } else if (msgs_copy[0].at("role") == "system") {
+ msgs_copy[0]["content"] = system_prompt;
+ }
+ }
+ return msgs_copy;
+ }
+
+ chat_template_caps original_caps() const {
+ return caps;
+ }
+
+};
struct common_chat_templates {
bool add_bos;
@@ -161,6 +258,7 @@ struct templates_params {
bool add_bos;
bool add_eos;
bool is_inference = true;
+ bool mark_input = true; // whether to mark input strings in the jinja context
};
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
@@ -189,7 +287,6 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates *
return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
}
-template <>
std::vector common_chat_msgs_parse_oaicompat(const json & messages) {
std::vector msgs;
@@ -283,80 +380,15 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
return msgs;
}
-template <>
json common_chat_msgs_to_json_oaicompat(const std::vector & msgs, bool concat_typed_text) {
json messages = json::array();
for (const auto & msg : msgs) {
- if (!msg.content.empty() && !msg.content_parts.empty()) {
- throw std::runtime_error("Cannot specify both content and content_parts");
- }
- json jmsg {
- {"role", msg.role},
- };
- if (!msg.content.empty()) {
- jmsg["content"] = msg.content;
- } else if (!msg.content_parts.empty()) {
- if (concat_typed_text) {
- std::string text;
- for (const auto & part : msg.content_parts) {
- if (part.type != "text") {
- LOG_WRN("Ignoring content part type: %s\n", part.type.c_str());
- continue;
- }
- if (!text.empty()) {
- text += '\n';
- }
- text += part.text;
- }
- jmsg["content"] = text;
- } else {
- auto & parts = jmsg["content"] = json::array();
- for (const auto & part : msg.content_parts) {
- parts.push_back({
- {"type", part.type},
- {"text", part.text},
- });
- }
- }
- } else {
- jmsg["content"] = "";
- }
- if (!msg.reasoning_content.empty()) {
- jmsg["reasoning_content"] = msg.reasoning_content;
- }
- if (!msg.tool_name.empty()) {
- jmsg["name"] = msg.tool_name;
- }
- if (!msg.tool_call_id.empty()) {
- jmsg["tool_call_id"] = msg.tool_call_id;
- }
- if (!msg.tool_calls.empty()) {
- auto & tool_calls = jmsg["tool_calls"] = json::array();
- for (const auto & tool_call : msg.tool_calls) {
- json tc {
- {"type", "function"},
- {"function", {
- {"name", tool_call.name},
- {"arguments", tool_call.arguments},
- }},
- };
- if (!tool_call.id.empty()) {
- tc["id"] = tool_call.id;
- }
- tool_calls.push_back(tc);
- }
- }
+ json jmsg = msg.to_json_oaicompat(concat_typed_text);
messages.push_back(jmsg);
}
return messages;
}
-template <>
-std::vector common_chat_msgs_parse_oaicompat(const std::string & messages) {
- return common_chat_msgs_parse_oaicompat(json::parse(messages));
-}
-
-template <>
std::vector common_chat_tools_parse_oaicompat(const json & tools) {
std::vector result;
@@ -392,12 +424,6 @@ std::vector common_chat_tools_parse_oaicompat(const json & too
return result;
}
-template <>
-std::vector common_chat_tools_parse_oaicompat(const std::string & tools) {
- return common_chat_tools_parse_oaicompat(json::parse(tools));
-}
-
-template <>
json common_chat_tools_to_json_oaicompat(const std::vector & tools) {
if (tools.empty()) {
return json();
@@ -417,7 +443,7 @@ json common_chat_tools_to_json_oaicompat(const std::vector & t
return result;
}
-template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
+json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
json delta = json::object();
if (!diff.reasoning_content_delta.empty()) {
delta["reasoning_content"] = diff.reasoning_content_delta;
@@ -534,18 +560,18 @@ bool common_chat_templates_was_explicit(const struct common_chat_templates * tmp
return tmpls->has_explicit_template;
}
-const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant) {
- if (variant != nullptr) {
- if (strcmp(variant, "tool_use") == 0) {
+std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
+ if (!variant.empty()) {
+ if (variant == "tool_use") {
if (tmpls->template_tool_use) {
- return tmpls->template_tool_use->source().c_str();
+ return tmpls->template_tool_use->source();
}
- return nullptr;
+ return "";
} else {
- LOG_DBG("%s: unknown template variant: %s\n", __func__, variant);
+ LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
}
}
- return tmpls->template_default->source().c_str();
+ return tmpls->template_default->source();
}
common_chat_templates_ptr common_chat_templates_init(
@@ -627,14 +653,16 @@ common_chat_templates_ptr common_chat_templates_init(
tmpls->add_bos = add_bos;
tmpls->add_eos = add_eos;
try {
- tmpls->template_default = std::make_unique(default_template_src, token_bos, token_eos);
+ tmpls->template_default = std::make_unique(default_template_src, token_bos, token_eos);
} catch (const std::exception & e) {
- LOG_ERR("%s: failed to parse chat template (defaulting to chatml): %s \n", __func__, e.what());
- tmpls->template_default = std::make_unique(CHATML_TEMPLATE_SRC, token_bos, token_eos);
+ LOG_ERR("%s: error: %s\n", __func__, e.what());
+ LOG_ERR("%s: failed to initialize chat template\n", __func__);
+ LOG_ERR("%s: please consider disabling jinja via --no-jinja, or using another chat template\n", __func__);
+ throw e;
}
if (!template_tool_use_src.empty()) {
try {
- tmpls->template_tool_use = std::make_unique(template_tool_use_src, token_bos, token_eos);
+ tmpls->template_tool_use = std::make_unique(template_tool_use_src, token_bos, token_eos);
} catch (const std::exception & e) {
LOG_ERR("%s: failed to parse tool use chat template (ignoring it): %s\n", __func__, e.what());
}
@@ -670,6 +698,7 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
+ case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
@@ -738,27 +767,43 @@ static std::string apply(
const std::optional & tools_override = std::nullopt,
const std::optional & additional_context = std::nullopt)
{
- minja::chat_template_inputs tmpl_inputs;
- tmpl_inputs.messages = messages_override ? *messages_override : inputs.messages;
- if (tools_override) {
- tmpl_inputs.tools = *tools_override;
- } else {
- tmpl_inputs.tools = inputs.tools.empty() ? json() : inputs.tools;
- }
- tmpl_inputs.add_generation_prompt = inputs.add_generation_prompt;
- tmpl_inputs.extra_context = inputs.extra_context;
- tmpl_inputs.extra_context["enable_thinking"] = inputs.enable_thinking;
- if (additional_context) {
- tmpl_inputs.extra_context.merge_patch(*additional_context);
- }
- // TODO: add flag to control date/time, if only for testing purposes.
- // tmpl_inputs.now = std::chrono::system_clock::now();
+ jinja::context ctx(tmpl.source());
- minja::chat_template_options tmpl_opts;
- // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
- // instead of using `chat_template_options.use_bos_token = false`, since these tokens
- // may be needed inside the template / between messages too.
- auto result = tmpl.apply(tmpl_inputs, tmpl_opts);
+ nlohmann::ordered_json inp = nlohmann::ordered_json{
+ {"messages", messages_override.has_value() ? *messages_override : inputs.messages},
+ {"tools", tools_override.has_value() ? *tools_override : inputs.tools},
+ {"bos_token", tmpl.bos_token()},
+ {"eos_token", tmpl.eos_token()},
+ };
+ if (inputs.extra_context.is_object()) {
+ // TODO: do we need to merge, or replacing is fine?
+ for (const auto & [k, v] : inputs.extra_context.items()) {
+ inp[k] = v;
+ }
+ }
+ if (additional_context.has_value()) {
+ // TODO: merge properly instead of overwriting (matching old behavior)
+ for (const auto & [k, v] : additional_context->items()) {
+ inp[k] = v;
+ }
+ }
+ if (inputs.add_generation_prompt) {
+ inp["add_generation_prompt"] = true;
+ }
+ if (inp["tools"].is_null()) {
+ inp["tools"] = json::array();
+ }
+
+ jinja::global_from_json(ctx, inp, inputs.mark_input);
+
+ // render
+ jinja::runtime runtime(ctx);
+ const jinja::value results = runtime.execute(tmpl.prog);
+ auto parts = runtime.gather_string_parts(results);
+
+ std::string result = parts->as_string().str();
+
+ // TODO: improve this later
if (inputs.add_bos && string_starts_with(result, tmpl.bos_token())) {
result = result.substr(tmpl.bos_token().size());
}
@@ -845,10 +890,17 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
builder.add_schema("root", schema);
});
- auto tweaked_messages = common_chat_template::add_system(
+ auto tweaked_messages = tmpl.add_system(
inputs.messages,
"Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
+ // ensure all messages has "content" field
+ for (auto & message : tweaked_messages) {
+ if (!message.contains("content") || message["content"].is_null()) {
+ message["content"] = "";
+ }
+ }
+
data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
data.format = COMMON_CHAT_FORMAT_GENERIC;
return data;
@@ -1363,7 +1415,7 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json {
{"date_string", format_time(inputs.now, "%d %b %Y")},
{"tools_in_user_message", false},
- {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
+ {"builtin_tools", builtin_tools},
});
return data;
}
@@ -2539,6 +2591,104 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t
return data;
}
+static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) {
+ common_chat_params data;
+
+ data.prompt = apply(tmpl, inputs);
+ data.format = COMMON_CHAT_FORMAT_EXAONE_MOE;
+ if (string_ends_with(data.prompt, "\n")) {
+ if (!inputs.enable_thinking) {
+ data.prompt += "\n\n";
+ } else {
+ data.thinking_forced_open = true;
+ }
+ }
+
+ if (inputs.tools.is_array() && !inputs.tools.empty()) {
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+ std::vector tool_rules;
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ auto parameters = function.at("parameters");
+ builder.resolve_refs(parameters);
+ // Expect: {"name": "", "arguments": {...}}
+ tool_rules.push_back(builder.add_rule(
+ name + "-call",
+ "\"\" space " +
+ builder.add_schema(name + "-obj", json{
+ {"type", "object"},
+ {"properties", {
+ {"name", json{{"const", name}}},
+ {"arguments", parameters},
+ }},
+ {"required", json::array({"name", "arguments"})},
+ }) +
+ " space \"\" space"));
+ });
+
+ auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
+ builder.add_rule("root",
+ std::string(data.thinking_forced_open ? "( \"\" space )? " : "") +
+ (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
+
+ data.grammar_triggers.push_back({
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
+ std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)?" : "") +
+ "()[\\s\\S]*"
+ });
+ data.preserved_tokens = {
+ "",
+ "",
+ "",
+ "",
+ };
+ });
+ }
+
+ return data;
+}
+
+static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) {
+ common_chat_params data;
+
+ // This template does not support tools or reasoning
+ // we just need to transform the messages into the correct schema
+
+ templates_params inputs_new = inputs;
+ json & messages = inputs_new.messages;
+
+ GGML_ASSERT(messages.is_array());
+ for (auto & message : messages) {
+ if (message.contains("role") && message["role"].get() != "user") {
+ continue;
+ }
+ if (!message.contains("content")) {
+ message["content"] = json::array();
+ }
+ if (message.contains("content") && !message["content"].is_array()) {
+ auto content_str = message["content"].get();
+ // default to en-GB if not specified (to make common_chat_format_example works)
+ auto src_lang = message.contains("source_lang_code") ? message["source_lang_code"].get() : "en-GB";
+ auto tgt_lang = message.contains("target_lang_code") ? message["target_lang_code"].get() : "en-GB";
+ message["content"] = json::array({
+ json{
+ {"type", "text"},
+ {"text", content_str},
+ {"source_lang_code", src_lang},
+ {"target_lang_code", tgt_lang},
+ }
+ });
+ }
+ }
+
+ data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt);
+ data.format = COMMON_CHAT_FORMAT_GENERIC;
+
+ return data;
+}
+
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
data.prompt = apply(tmpl, inputs);
@@ -2609,18 +2759,119 @@ static common_chat_params common_chat_params_init_seed_oss(
return data;
}
+// various workarounds for known issues with certain templates or model behaviors
+// TODO @ngxson : improve this (how?)
+namespace workaround {
+
+// if first message is system and template does not support it, merge it with next message
+static void system_message_not_supported(json & messages) {
+ if (!messages.empty() && messages.front().at("role") == "system") {
+ if (messages.size() > 1) {
+ LOG_DBG("Merging system prompt into next message\n");
+ auto & first_msg = messages.front();
+ auto & second_msg = messages[1];
+ second_msg["content"] = first_msg.at("content").get()
+ + "\n" + second_msg.at("content").get();
+ messages.erase(messages.begin());
+ } else {
+ LOG_WRN("Removing system prompt due to template not supporting system role\n");
+ messages.erase(messages.begin());
+ }
+ }
+}
+
+static void func_args_not_string(json & messages) {
+ GGML_ASSERT(messages.is_array());
+ for (auto & message : messages) {
+ if (message.contains("tool_calls")) {
+ for (auto & tool_call : message["tool_calls"]) {
+ if (tool_call.contains("function") && tool_call["function"].contains("arguments")) {
+ auto & args = tool_call["function"]["arguments"];
+ if (args.is_string()) {
+ try {
+ args = json::parse(args.get());
+ } catch (const std::exception & e) {
+ throw std::runtime_error("Failed to parse tool call arguments as JSON: " + std::string(e.what()));
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) {
+ GGML_ASSERT(messages.is_array());
+ for (auto & message : messages) {
+ if (message.contains("tool_calls")) {
+ auto tool_calls_new = json{
+ {"tool_calls", message.at("tool_calls")}
+ };
+ message.erase("tool_calls");
+ auto content = message.at("content");
+ std::string content_new = content.is_null() ? "" : content.get();
+ message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace);
+ }
+ }
+}
+
+// TODO @ngxson : we may remove support for generic schema in the future
+static void use_generic_schema(json & messages) {
+ GGML_ASSERT(messages.is_array());
+ for (auto & message : messages) {
+ if (message.contains("tool_calls") && message.at("tool_calls").is_array()) {
+ auto & tool_calls = message.at("tool_calls");
+ for (auto & tool_call : tool_calls) {
+ if (tool_call.contains("type") && tool_call.at("type") == "function" &&
+ tool_call.contains("function") && tool_call.at("function").is_object()) {
+ // Copy values before erasing to avoid use-after-free
+ json name_value;
+ json arguments_value;
+ json id_value;
+ const auto & function = tool_call.at("function");
+ if (function.contains("name")) {
+ name_value = function.at("name");
+ }
+ if (function.contains("arguments")) {
+ arguments_value = function.at("arguments");
+ }
+ if (tool_call.contains("id")) {
+ id_value = tool_call.at("id");
+ }
+ // Now safely erase and assign in the correct order
+ tool_call.erase("type");
+ tool_call.erase("function");
+ tool_call.erase("id");
+ // Reassign in desired order: name, arguments, id
+ if (!name_value.is_null()) {
+ tool_call["name"] = name_value;
+ }
+ if (!arguments_value.is_null()) {
+ tool_call["arguments"] = arguments_value;
+ }
+ if (!id_value.is_null()) {
+ tool_call["id"] = id_value;
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace workaround
+
static common_chat_params common_chat_templates_apply_jinja(
const struct common_chat_templates * tmpls,
const struct common_chat_templates_inputs & inputs)
{
templates_params params;
- params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
+ params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
? *tmpls->template_tool_use
: *tmpls->template_default;
const auto & src = tmpl.source();
const auto & caps = tmpl.original_caps();
- params.messages = common_chat_msgs_to_json_oaicompat(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
+ params.messages = common_chat_msgs_to_json_oaicompat(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
params.add_generation_prompt = inputs.add_generation_prompt;
params.tool_choice = inputs.tool_choice;
params.reasoning_format = inputs.reasoning_format;
@@ -2630,6 +2881,10 @@ static common_chat_params common_chat_templates_apply_jinja(
params.add_bos = tmpls->add_bos;
params.add_eos = tmpls->add_eos;
+ if (!tmpl.original_caps().supports_system_role) {
+ workaround::system_message_not_supported(params.messages);
+ }
+
params.extra_context = json::object();
for (auto el : inputs.chat_template_kwargs) {
params.extra_context[el.first] = json::parse(el.second);
@@ -2668,11 +2923,15 @@ static common_chat_params common_chat_templates_apply_jinja(
// Command R7B: : use handler in all cases except json schema (thinking / tools).
if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
+ workaround::func_args_not_string(params.messages);
return common_chat_params_init_command_r7b(tmpl, params);
}
// Granite (IBM) - detects thinking / tools support
if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) {
+ workaround::func_args_not_string(params.messages);
+ workaround::use_generic_schema(params.messages);
+ workaround::move_tool_calls_to_content(params.messages);
return common_chat_params_init_granite(tmpl, params);
}
@@ -2681,6 +2940,11 @@ static common_chat_params common_chat_templates_apply_jinja(
src.find("") != std::string::npos &&
src.find("") != std::string::npos &&
params.json_schema.is_null()) {
+ workaround::func_args_not_string(params.messages);
+ if (!params.extra_context.contains("clear_thinking")) {
+ // by default, do not clear reasoning_content (added since GLM-4.7)
+ params.extra_context["clear_thinking"] = false;
+ }
return common_chat_params_init_glm_4_5(tmpl, params);
}
@@ -2692,6 +2956,7 @@ static common_chat_params common_chat_templates_apply_jinja(
src.find("") != std::string::npos &&
src.find("") != std::string::npos) {
return common_chat_params_init_nemotron_v3(tmpl, params);
@@ -2709,6 +2974,13 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_xiaomi_mimo(tmpl, params);
}
+ // EXAONE MoE format detection
+ if (src.find("") != std::string::npos &&
+ src.find("") != std::string::npos &&
+ src.find("<|tool_declare|>") != std::string::npos) {
+ return common_chat_params_init_exaone_moe(tmpl, params);
+ }
+
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
if (src.find("") != std::string::npos && params.json_schema.is_null()) {
return common_chat_params_init_hermes_2_pro(tmpl, params);
@@ -2721,6 +2993,7 @@ static common_chat_params common_chat_templates_apply_jinja(
// Seed-OSS
if (src.find("") != std::string::npos) {
+ workaround::func_args_not_string(params.messages);
return common_chat_params_init_seed_oss(tmpl, params, inputs);
}
@@ -2742,6 +3015,7 @@ static common_chat_params common_chat_templates_apply_jinja(
// MiniMax-M2 format detection
if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
+ workaround::func_args_not_string(params.messages);
return common_chat_params_init_minimax_m2(tmpl, params);
}
@@ -2788,6 +3062,7 @@ static common_chat_params common_chat_templates_apply_jinja(
// Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
+ workaround::func_args_not_string(params.messages);
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
}
@@ -2809,6 +3084,12 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_solar_open(tmpl, params);
}
+ // TranslateGemma
+ if (src.find("[source_lang_code]") != std::string::npos &&
+ src.find("[target_lang_code]") != std::string::npos) {
+ return common_chat_params_init_translate_gemma(tmpl, params);
+ }
+
// Plain handler (no tools)
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
return common_chat_params_init_without_tools(tmpl, params);
@@ -2816,10 +3097,14 @@ static common_chat_params common_chat_templates_apply_jinja(
// Mistral Nemo (w/ tools)
if (src.find("[TOOL_CALLS]") != std::string::npos) {
+ workaround::func_args_not_string(params.messages);
return common_chat_params_init_mistral_nemo(tmpl, params);
}
// Generic fallback
+ workaround::func_args_not_string(params.messages);
+ workaround::use_generic_schema(params.messages);
+ workaround::move_tool_calls_to_content(params.messages);
return common_chat_params_init_generic(tmpl, params);
}
@@ -2897,3 +3182,9 @@ common_chat_params common_chat_templates_apply(
? common_chat_templates_apply_jinja(tmpls, inputs)
: common_chat_templates_apply_legacy(tmpls, inputs);
}
+
+std::map common_chat_templates_get_caps(const common_chat_templates * chat_templates) {
+ GGML_ASSERT(chat_templates != nullptr);
+ GGML_ASSERT(chat_templates->template_default != nullptr);
+ return chat_templates->template_default->caps.to_map();
+}
diff --git a/common/chat.h b/common/chat.h
index 8bd4a325ff..24aa4aab5c 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -10,6 +10,8 @@
#include
#include