diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile
index ebf23ba5cf..fd7195c5be 100644
--- a/.devops/vulkan.Dockerfile
+++ b/.devops/vulkan.Dockerfile
@@ -50,6 +50,7 @@ WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
+ build-essential \
git \
python3 \
python3-pip \
diff --git a/.github/workflows/build-riscv-native.yml b/.github/workflows/build-riscv-native.yml
deleted file mode 100644
index a3a0b0d663..0000000000
--- a/.github/workflows/build-riscv-native.yml
+++ /dev/null
@@ -1,120 +0,0 @@
-name: Build on RISCV Linux Machine by Cloud-V
-on:
- pull_request:
- workflow_dispatch:
- workflow_call:
-
-jobs:
- debian-13-riscv64-native: # Bianbu 2.2
- runs-on: [self-hosted, RISCV64]
-
- steps:
- - name: Install prerequisites
- run: |
- sudo apt-get update || true
- sudo apt-get install -y libatomic1
- - uses: actions/checkout@v4
- - name: Setup Riscv
- run: |
- sudo apt-get update || true
- sudo apt-get install -y --no-install-recommends \
- build-essential \
- gcc-14-riscv64-linux-gnu \
- g++-14-riscv64-linux-gnu \
- ccache \
- cmake
-
- - name: Setup ccache
- run: |
- mkdir -p $HOME/.ccache
- ccache -M 5G -d $HOME/.ccache
- export CCACHE_LOGFILE=/home/runneruser/ccache_debug/ccache.log
- export CCACHE_DEBUGDIR="/home/runneruser/ccache_debug"
- echo "$GITHUB_WORKSPACE"
- echo "CCACHE_LOGFILE=$CCACHE_LOGFILE" >> $GITHUB_ENV
- echo "CCACHE_DEBUGDIR=$CCACHE_DEBUGDIR" >> $GITHUB_ENV
- echo "CCACHE_BASEDIR=$GITHUB_WORKSPACE" >> $GITHUB_ENV
- echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV
-
- - name: Build
- run: |
- cmake -B build \
- -DLLAMA_CURL=OFF \
- -DCMAKE_BUILD_TYPE=Release \
- -DGGML_OPENMP=OFF \
- -DLLAMA_BUILD_EXAMPLES=ON \
- -DLLAMA_BUILD_TOOLS=ON \
- -DLLAMA_BUILD_TESTS=OFF \
- -DCMAKE_SYSTEM_NAME=Linux \
- -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
- -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
- -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
- -DCMAKE_C_COMPILER_LAUNCHER=ccache \
- -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
- -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
- -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
- -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
- -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
- -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
-
- cmake --build build --config Release -j $(nproc)
-
- # debian-13-riscv64-spacemit-ime-native: # Bianbu 2.2
- # runs-on: [self-hosted, RISCV64]
-
- # steps:
- # - name: Install prerequisites
- # run: |
- # sudo apt-get update || true
- # sudo apt-get install -y libatomic1
- # - uses: actions/checkout@v4
- # - name: Setup Riscv
- # run: |
- # sudo apt-get update || true
- # sudo apt-get install -y --no-install-recommends \
- # build-essential \
- # gcc-14-riscv64-linux-gnu \
- # g++-14-riscv64-linux-gnu \
- # ccache \
- # cmake
- # sudo apt-get upgrade binutils -y
-
- # - name: Setup ccache
- # run: |
- # mkdir -p $HOME/.ccache
- # ccache -M 5G -d $HOME/.ccache
- # export CCACHE_LOGFILE=/home/runneruser/ccache_debug/ccache.log
- # export CCACHE_DEBUGDIR="/home/runneruser/ccache_debug"
- # echo "$GITHUB_WORKSPACE"
- # echo "CCACHE_LOGFILE=$CCACHE_LOGFILE" >> $GITHUB_ENV
- # echo "CCACHE_DEBUGDIR=$CCACHE_DEBUGDIR" >> $GITHUB_ENV
- # echo "CCACHE_BASEDIR=$GITHUB_WORKSPACE" >> $GITHUB_ENV
- # echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV
-
- # - name: Build
- # run: |
- # cmake -B build \
- # -DLLAMA_CURL=OFF \
- # -DCMAKE_BUILD_TYPE=Release \
- # -DGGML_OPENMP=OFF \
- # -DLLAMA_BUILD_EXAMPLES=ON \
- # -DLLAMA_BUILD_TOOLS=ON \
- # -DLLAMA_BUILD_TESTS=OFF \
- # -DCMAKE_SYSTEM_NAME=Linux \
- # -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
- # -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
- # -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
- # -DCMAKE_C_COMPILER_LAUNCHER=ccache \
- # -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
- # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
- # -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
- # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
- # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
- # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH \
- # -DGGML_RVV=ON \
- # -DGGML_RV_ZFH=ON \
- # -DGGML_RV_ZICBOP=ON \
- # -DGGML_CPU_RISCV64_SPACEMIT=ON \
- # -DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1
-
- # cmake --build build --config Release -j $(nproc)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index eee42759fc..49e836d9b2 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -547,6 +547,46 @@ jobs:
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 3600
+ ubuntu-24-wasm-webgpu:
+ runs-on: ubuntu-24.04
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: ggml-org/ccache-action@v1.2.16
+ with:
+ key: ubuntu-latest-wasm-webgpu
+ evict-old-files: 1d
+
+ - name: Install Emscripten
+ run: |
+ git clone https://github.com/emscripten-core/emsdk.git
+ cd emsdk
+ ./emsdk install latest
+ ./emsdk activate latest
+
+ - name: Fetch emdawnwebgpu
+ run: |
+ DAWN_TAG="v20251027.212519"
+ EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
+ echo "Downloading ${EMDAWN_PKG}"
+ curl -L -o emdawn.zip \
+ "https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}"
+ unzip emdawn.zip
+
+ - name: Build WASM WebGPU
+ run: |
+ source emsdk/emsdk_env.sh
+ emcmake cmake -B build-wasm \
+ -DGGML_WEBGPU=ON \
+ -DLLAMA_CURL=OFF \
+ -DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg
+
+ cmake --build build-wasm --target test-backend-ops -j $(nproc)
+
ubuntu-22-cmake-hip:
runs-on: ubuntu-22.04
container: rocm/dev-ubuntu-22.04:6.1.2
@@ -1642,6 +1682,337 @@ jobs:
run: |
GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+ ubuntu-cpu-cmake-riscv64-native:
+ runs-on: RISCV64
+
+ steps:
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+
+ # Install necessary packages
+ sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache
+
+ # Set gcc-14 and g++-14 as the default compilers
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
+ sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
+ sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
+
+ # Install Rust stable version
+ rustup install stable
+ rustup default stable
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Check environment
+ run: |
+ uname -a
+ gcc --version
+ g++ --version
+ ldd --version
+ cmake --version
+ rustc --version
+
+ - name: Setup ccache
+ run: |
+ # Set unique cache directory for this job
+ export CCACHE_DIR="$HOME/.ccache/cpu-cmake-rv64-native"
+ mkdir -p "$CCACHE_DIR"
+
+ # Configure ccache for optimal performance
+ ccache --set-config=max_size=5G
+ ccache --set-config=compression=true
+ ccache --set-config=compression_level=6
+ ccache --set-config=cache_dir="$CCACHE_DIR"
+
+ # Enable more aggressive caching
+ ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
+ ccache --set-config=hash_dir=false
+
+ # Export for subsequent steps
+ echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
+ echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENMP=OFF \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=ON \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DGGML_RPC=ON \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
+
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L 'main|curl' --verbose --timeout 900
+
+ - name: Test llama2c conversion
+ id: llama2c_test
+ run: |
+ cd build
+ echo "Fetch tokenizer"
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
+ echo "Fetch llama2c model"
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
+ ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
+ ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+
+ ubuntu-cmake-sanitizer-riscv64-native:
+ runs-on: RISCV64
+
+ continue-on-error: true
+
+ strategy:
+ matrix:
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
+ build_type: [Debug]
+
+ steps:
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+
+ # Install necessary packages
+ sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache
+
+ # Set gcc-14 and g++-14 as the default compilers
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
+ sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
+ sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
+
+ # Install Rust stable version
+ rustup install stable
+ rustup default stable
+
+ - name: GCC version check
+ run: |
+ gcc --version
+ g++ --version
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Setup ccache
+ run: |
+ # Unique cache directory per matrix combination
+ export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}"
+ mkdir -p "$CCACHE_DIR"
+
+ # Configure ccache
+ ccache --set-config=max_size=5G
+ ccache --set-config=compression=true
+ ccache --set-config=compression_level=6
+ ccache --set-config=cache_dir="$CCACHE_DIR"
+ ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
+ ccache --set-config=hash_dir=false
+
+ # Export for subsequent steps
+ echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
+ echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
+
+ - name: Build
+ id: cmake_build
+ if: ${{ matrix.sanitizer != 'THREAD' }}
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+ -DGGML_OPENMP=ON \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
+
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
+
+ - name: Build (no OpenMP)
+ id: cmake_build_no_openmp
+ if: ${{ matrix.sanitizer == 'THREAD' }}
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+ -DGGML_OPENMP=OFF \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
+
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose --timeout 900
+
+
+ ubuntu-llguidance-riscv64-native:
+ runs-on: RISCV64
+ steps:
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+
+ # Install necessary packages
+ sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache
+
+ # Set gcc-14 and g++-14 as the default compilers
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
+ sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
+ sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
+
+ # Install Rust stable version
+ rustup install stable
+ rustup default stable
+
+ - name: GCC version check
+ run: |
+ gcc --version
+ g++ --version
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Setup ccache
+ run: |
+ export CCACHE_DIR="$HOME/.ccache/llguidance-riscv64"
+ mkdir -p "$CCACHE_DIR"
+
+ ccache --set-config=max_size=5G
+ ccache --set-config=compression=true
+ ccache --set-config=compression_level=6
+ ccache --set-config=cache_dir="$CCACHE_DIR"
+ ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
+ ccache --set-config=hash_dir=false
+
+ echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
+ echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENMP=OFF \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DLLAMA_LLGUIDANCE=ON \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
+
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose --timeout 900
+
+
+ ubuntu-cmake-rpc-riscv64-native:
+ runs-on: RISCV64
+
+ continue-on-error: true
+
+ steps:
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+
+ # Install necessary packages
+ sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache
+
+ # Set gcc-14 and g++-14 as the default compilers
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
+ sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
+ sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
+
+ # Install Rust stable version
+ rustup install stable
+ rustup default stable
+
+ - name: GCC version check
+ run: |
+ gcc --version
+ g++ --version
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Setup ccache
+ run: |
+ export CCACHE_DIR="$HOME/.ccache/rpc-riscv64"
+ mkdir -p "$CCACHE_DIR"
+
+ ccache --set-config=max_size=5G
+ ccache --set-config=compression=true
+ ccache --set-config=compression_level=6
+ ccache --set-config=cache_dir="$CCACHE_DIR"
+ ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
+ ccache --set-config=hash_dir=false
+
+ echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
+ echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENMP=OFF \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=ON \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
+ -DGGML_RPC=ON
+
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose
+
ggml-ci-arm64-graviton4-kleidiai:
runs-on: ah-ubuntu_22_04-c8g_8x
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 0d5739c24b..da1363a798 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -66,14 +66,21 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
+ zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -C ./build/bin .
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
name: llama-bin-macos-arm64.zip
+ - name: Upload artifacts (tar)
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz
+ name: llama-bin-macos-arm64.tar.gz
+
macOS-x64:
runs-on: macos-15-intel
@@ -120,14 +127,21 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
+ zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz -C ./build/bin .
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
name: llama-bin-macos-x64.zip
+ - name: Upload artifacts (tar)
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz
+ name: llama-bin-macos-x64.tar.gz
+
ubuntu-22-cpu:
strategy:
matrix:
@@ -182,14 +196,21 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
+ zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz -C ./build/bin .
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
name: llama-bin-ubuntu-${{ matrix.build }}.zip
+ - name: Upload artifacts (tar)
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz
+ name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz
+
ubuntu-22-vulkan:
runs-on: ubuntu-22.04
@@ -235,14 +256,21 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
+ zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz -C ./build/bin .
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
name: llama-bin-ubuntu-vulkan-x64.zip
+ - name: Upload artifacts (tar)
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
+ name: llama-bin-ubuntu-vulkan-x64.tar.gz
+
windows-cpu:
runs-on: windows-2025
@@ -298,7 +326,7 @@ jobs:
run: |
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
- 7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
+ 7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
- name: Upload artifacts
uses: actions/upload-artifact@v4
@@ -380,7 +408,7 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
- 7z a llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
+ 7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
- name: Upload artifacts
uses: actions/upload-artifact@v4
@@ -434,7 +462,7 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
- 7z a llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
+ 7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
- name: Upload artifacts
uses: actions/upload-artifact@v4
@@ -526,7 +554,7 @@ jobs:
cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin
echo "cp oneAPI running time dll files to ./build/bin done"
- 7z a llama-bin-win-sycl-x64.zip ./build/bin/*
+ 7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/*
- name: Upload the release package
uses: actions/upload-artifact@v4
@@ -632,7 +660,7 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
- 7z a llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
+ 7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
- name: Upload artifacts
uses: actions/upload-artifact@v4
@@ -685,58 +713,20 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
- zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
+ zip -y -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
+ tar -czvf llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz -C build-apple llama.xcframework
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
- name: llama-${{ steps.tag.outputs.name }}-xcframework
+ name: llama-${{ steps.tag.outputs.name }}-xcframework.zip
- openEuler-cann:
- strategy:
- matrix:
- arch: [x86, aarch64]
- chip_type: ['910b', '310p']
- build: ['Release']
- runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
- container: ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc1.alpha001-910b-openeuler22.03-py3.11' || '8.2.rc1-310p-openeuler22.03-py3.11' }}
- steps:
- - name: Checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Dependencies
- run: |
- yum update -y
- yum install -y git gcc gcc-c++ make cmake libcurl-devel
- git config --global --add safe.directory "$GITHUB_WORKSPACE"
-
- - name: Build
- run: |
- export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
-
- cmake -S . -B build \
- -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
- -DGGML_CANN=on \
- -DSOC_TYPE=ascend${{ matrix.chip_type }}
- cmake --build build -j $(nproc)
-
- - name: Determine tag name
- id: tag
- uses: ./.github/actions/get-tag-name
-
- - name: Pack artifacts
- run: |
- cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip ./build/bin/*
-
- - name: Upload artifacts
+ - name: Upload artifacts (tar)
uses: actions/upload-artifact@v4
with:
- path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
- name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
+ path: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz
+ name: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz
release:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
@@ -759,7 +749,6 @@ jobs:
- macOS-arm64
- macOS-x64
- ios-xcode-build
- - openEuler-cann
steps:
- name: Clone
@@ -814,6 +803,7 @@ jobs:
echo "Moving other artifacts..."
mv -v artifact/*.zip release
+ mv -v artifact/*.tar.gz release
- name: Create release
id: create_release
@@ -822,6 +812,33 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.tag.outputs.name }}
+ body: |
+ > [!WARNING]
+ > **Release Format Update**: Linux releases will soon use .tar.gz archives instead of .zip. Please make the necessary changes to your deployment scripts.
+
+
+
+ ${{ github.event.head_commit.message }}
+
+
+
+ **macOS/iOS:**
+ - [macOS Apple Silicon (arm64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz)
+ - [macOS Intel (x64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz)
+ - [iOS XCFramework](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz)
+
+ **Linux:**
+ - [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz)
+ - [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz)
+ - [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz)
+
+ **Windows:**
+ - [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip)
+ - [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip)
+ - [Windows x64 (CUDA)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip)
+ - [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
+ - [Windows x64 (SYCL)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip)
+ - [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip)
- name: Upload release
id: upload_release
@@ -833,7 +850,7 @@ jobs:
const fs = require('fs');
const release_id = '${{ steps.create_release.outputs.id }}';
for (let file of await fs.readdirSync('./release')) {
- if (path.extname(file) === '.zip') {
+ if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
diff --git a/.github/workflows/winget.yml b/.github/workflows/winget.yml
index 5c28615595..17b55762a9 100644
--- a/.github/workflows/winget.yml
+++ b/.github/workflows/winget.yml
@@ -9,6 +9,7 @@ jobs:
update:
name: Update Winget Package
runs-on: ubuntu-latest
+ if: ${{ github.repository.owner.login == 'ggml-org' }}
steps:
- name: Install cargo binstall
diff --git a/.gitignore b/.gitignore
index 8575a141c4..428f084110 100644
--- a/.gitignore
+++ b/.gitignore
@@ -134,3 +134,5 @@ poetry.toml
# IDE
/*.code-workspace
/.windsurf/
+# emscripten
+a.out.*
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3278c4a72c..c231ec0e3f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,10 +33,24 @@ endif()
option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
+option(LLAMA_WASM_MEM64 "llama: use 64-bit memory in WASM builds" ON)
+
if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
- option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
+ # Use 64-bit memory to support backend_get_memory queries
+ # TODO: analyze performance impact, see https://spidermonkey.dev/blog/2025/01/15/is-memory64-actually-worth-using
+ if (LLAMA_WASM_MEM64)
+ add_compile_options("-sMEMORY64=1")
+ add_link_options("-sMEMORY64=1")
+ endif()
+ add_link_options("-sALLOW_MEMORY_GROWTH=1")
+
+ option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
+ option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
+ if (LLAMA_BUILD_HTML)
+ set(CMAKE_EXECUTABLE_SUFFIX ".html")
+ endif()
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
@@ -58,6 +72,12 @@ if (MSVC)
add_compile_options("$<$:/bigobj>")
endif()
+if (LLAMA_STANDALONE)
+ # enable parallel builds for msbuild
+ list(APPEND CMAKE_VS_GLOBALS UseMultiToolTask=true)
+ list(APPEND CMAKE_VS_GLOBALS EnforceProcessCountAcrossBuilds=true)
+endif()
+
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
set(LLAMA_TOOLS_INSTALL_DEFAULT OFF)
else()
@@ -179,11 +199,6 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
# ... otherwise assume ggml is added by a parent CMakeLists.txt
endif()
-if (MINGW)
- # Target Windows 8 for PrefetchVirtualMemory
- add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
-endif()
-
#
# build the library
#
diff --git a/CODEOWNERS b/CODEOWNERS
index 6ef6c0489f..450191b734 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -7,16 +7,19 @@
/ci/ @ggerganov
/cmake/ @ggerganov
/common/CMakeLists.txt @ggerganov
-/common/arg.* @ggerganov @ericcurtin
+/common/arg.* @ggerganov
/common/base64.hpp.* @ggerganov
/common/build-info.* @ggerganov
+/common/chat-peg-parser.* @aldehir
/common/common.* @ggerganov
/common/console.* @ggerganov
/common/http.* @angt
/common/llguidance.* @ggerganov
/common/log.* @ggerganov
+/common/peg-parser.* @aldehir
/common/sampling.* @ggerganov
/common/speculative.* @ggerganov
+/common/unicode.* @aldehir
/convert_*.py @CISC
/examples/batched.swift/ @ggerganov
/examples/batched/ @ggerganov
@@ -87,8 +90,7 @@
/tools/perplexity/ @ggerganov
/tools/quantize/ @ggerganov
/tools/rpc/ @rgerganov
-/tools/run/ @ericcurtin
-/tools/server/* @ngxson @ggerganov @ericcurtin # no subdir
+/tools/server/* @ngxson @ggerganov # no subdir
/tools/server/webui/ @allozaur
/tools/tokenize/ @ggerganov
/tools/tts/ @ggerganov
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b808fa31ea..875eb766f3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -19,6 +19,7 @@ The project differentiates between 3 levels of contributors:
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
- Maintainers will rely on your insights and approval when making a final decision to approve and merge a PR
- Consider adding yourself to [CODEOWNERS](CODEOWNERS) to indicate your availability for reviewing related PRs
+- Using AI to generate PRs is permitted. However, you must (1) explicitly disclose how AI was used and (2) conduct a thorough manual review before publishing the PR. Note that trivial tab autocompletions do not require disclosure.
# Pull requests (for maintainers)
diff --git a/README.md b/README.md
index cff3bd4370..2e44ae7d0c 100644
--- a/README.md
+++ b/README.md
@@ -613,3 +613,4 @@ $ echo "source ~/.llama-completion.bash" >> ~/.bashrc
- [linenoise.cpp](./tools/run/linenoise.cpp/linenoise.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - BSD 2-Clause License
- [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html)
- [miniaudio.h](https://github.com/mackron/miniaudio) - Single-header audio format decoder, used by multimodal subsystem - Public domain
+- [subprocess.h](https://github.com/sheredom/subprocess.h) - Single-header process launching solution for C and C++ - Public domain
diff --git a/SECURITY.md b/SECURITY.md
index 9749e95b71..9c86ae91b5 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -65,4 +65,6 @@ However, If you have discovered a security vulnerability in this project, please
Please disclose it as a private [security advisory](https://github.com/ggml-org/llama.cpp/security/advisories/new).
+Please note that using AI to identify vulnerabilities and generate reports is permitted. However, you must (1) explicitly disclose how AI was used and (2) conduct a thorough manual review before submitting the report.
+
A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
diff --git a/ci/run.sh b/ci/run.sh
index 3fec8e9110..83b2603e82 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -45,7 +45,7 @@ sd=`dirname $0`
cd $sd/../
SRC=`pwd`
-CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON"
+CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=${LLAMA_FATAL_WARNINGS:-ON} -DLLAMA_CURL=ON -DGGML_SCHED_NO_REALLOC=ON"
if [ ! -z ${GG_BUILD_METAL} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
@@ -428,10 +428,10 @@ function gg_run_qwen3_0_6b {
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
- (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
- (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
- (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
- (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa off --no-op-offload) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa on --no-op-offload) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
function check_ppl {
qnt="$1"
@@ -523,8 +523,8 @@ function gg_run_embd_bge_small {
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
- (time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
- (time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
+ (time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 --no-op-offload) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
+ (time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 --no-op-offload) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
set +e
}
@@ -564,7 +564,7 @@ function gg_run_rerank_tiny {
model_f16="${path_models}/ggml-model-f16.gguf"
# for this model, the SEP token is ""
- (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
+ (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --no-op-offload --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
# sample output
# rerank score 0: 0.029
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index bb168e8358..377b26846b 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -52,6 +52,8 @@ add_library(${TARGET} STATIC
chat-parser.h
chat-parser-xml-toolcall.h
chat-parser-xml-toolcall.cpp
+ chat-peg-parser.cpp
+ chat-peg-parser.h
chat.cpp
chat.h
common.cpp
@@ -69,12 +71,16 @@ add_library(${TARGET} STATIC
log.h
ngram-cache.cpp
ngram-cache.h
+ peg-parser.cpp
+ peg-parser.h
regex-partial.cpp
regex-partial.h
sampling.cpp
sampling.h
speculative.cpp
speculative.h
+ unicode.cpp
+ unicode.h
)
if (BUILD_SHARED_LIBS)
diff --git a/common/arg.cpp b/common/arg.cpp
index dd787290d2..45c0d1a726 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -30,6 +30,7 @@
#include // for hardware_concurrency
#include
+#ifndef __EMSCRIPTEN__
#ifdef __linux__
#include
#elif defined(_WIN32)
@@ -41,6 +42,8 @@
#else
#include
#endif
+#endif
+
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
using json = nlohmann::ordered_json;
@@ -212,13 +215,13 @@ struct handle_model_result {
static handle_model_result common_params_handle_model(
struct common_params_model & model,
const std::string & bearer_token,
- const std::string & model_path_default,
bool offline) {
handle_model_result result;
// handle pre-fill default model path and url based on hf_repo and hf_file
{
if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths
model.path = common_docker_resolve_model(model.docker_repo);
+ model.name = model.docker_repo; // set name for consistency
} else if (!model.hf_repo.empty()) {
// short-hand to avoid specifying --hf-file -> default it to --model
if (model.hf_file.empty()) {
@@ -227,7 +230,8 @@ static handle_model_result common_params_handle_model(
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
exit(1); // built without CURL, error message already printed
}
- model.hf_repo = auto_detected.repo;
+ model.name = model.hf_repo; // repo name with tag
+ model.hf_repo = auto_detected.repo; // repo name without tag
model.hf_file = auto_detected.ggufFile;
if (!auto_detected.mmprojFile.empty()) {
result.found_mmproj = true;
@@ -257,8 +261,6 @@ static handle_model_result common_params_handle_model(
model.path = fs_get_cache_file(string_split(f, '/').back());
}
- } else if (model.path.empty()) {
- model.path = model_path_default;
}
}
@@ -405,7 +407,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
// handle model and download
{
- auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
+ auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
if (params.no_mmproj) {
params.mmproj = {};
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
@@ -415,12 +417,18 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
// only download mmproj if the current example is using it
for (auto & ex : mmproj_examples) {
if (ctx_arg.ex == ex) {
- common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
+ common_params_handle_model(params.mmproj, params.hf_token, params.offline);
break;
}
}
- common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline);
- common_params_handle_model(params.vocoder.model, params.hf_token, "", params.offline);
+ common_params_handle_model(params.speculative.model, params.hf_token, params.offline);
+ common_params_handle_model(params.vocoder.model, params.hf_token, params.offline);
+ }
+
+ // model is required (except for server)
+ // TODO @ngxson : maybe show a list of available models in CLI in this case
+ if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
+ throw std::invalid_argument("error: --model is required\n");
}
if (params.escape) {
@@ -694,6 +702,12 @@ static bool is_autoy(const std::string & value) {
}
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
+ // default values specific to example
+ // note: we place it here instead of inside server.cpp to allow llama-gen-docs to pick it up
+ if (ex == LLAMA_EXAMPLE_SERVER) {
+ params.use_jinja = true;
+ }
+
// load dynamic backends
ggml_backend_load_all();
@@ -974,7 +988,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params) {
params.kv_unified = true;
}
- ).set_env("LLAMA_ARG_KV_SPLIT"));
+ ).set_env("LLAMA_ARG_KV_UNIFIED"));
add_opt(common_arg(
{"--no-context-shift"},
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
@@ -1215,7 +1229,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params) {
params.warmup = false;
}
- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
add_opt(common_arg(
{"--spm-infill"},
string_format(
@@ -2084,11 +2098,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
add_opt(common_arg(
{"-m", "--model"}, "FNAME",
ex == LLAMA_EXAMPLE_EXPORT_LORA
- ? std::string("model path from which to load base model")
- : string_format(
- "model path (default: `models/$filename` with filename from `--hf-file` "
- "or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH
- ),
+ ? "model path from which to load base model"
+ : "model path to load",
[](common_params & params, const std::string & value) {
params.model.path = value;
}
@@ -2480,19 +2491,64 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
"path to save slot kv cache (default: disabled)",
[](common_params & params, const std::string & value) {
params.slot_save_path = value;
+ if (!fs_is_directory(params.slot_save_path)) {
+ throw std::invalid_argument("not a directory: " + value);
+ }
// if doesn't end with DIRECTORY_SEPARATOR, add it
if (!params.slot_save_path.empty() && params.slot_save_path[params.slot_save_path.size() - 1] != DIRECTORY_SEPARATOR) {
params.slot_save_path += DIRECTORY_SEPARATOR;
}
}
).set_examples({LLAMA_EXAMPLE_SERVER}));
+ add_opt(common_arg(
+ {"--media-path"}, "PATH",
+ "directory for loading local media files; files can be accessed via file:// URLs using relative paths (default: disabled)",
+ [](common_params & params, const std::string & value) {
+ params.media_path = value;
+ if (!fs_is_directory(params.media_path)) {
+ throw std::invalid_argument("not a directory: " + value);
+ }
+ // if doesn't end with DIRECTORY_SEPARATOR, add it
+ if (!params.media_path.empty() && params.media_path[params.media_path.size() - 1] != DIRECTORY_SEPARATOR) {
+ params.media_path += DIRECTORY_SEPARATOR;
+ }
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}));
+ add_opt(common_arg(
+ {"--models-dir"}, "PATH",
+ "directory containing models for the router server (default: disabled)",
+ [](common_params & params, const std::string & value) {
+ params.models_dir = value;
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
+ add_opt(common_arg(
+ {"--models-max"}, "N",
+ string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),
+ [](common_params & params, int value) {
+ params.models_max = value;
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
+ add_opt(common_arg(
+ {"--no-models-autoload"},
+ "disables automatic loading of models (default: enabled)",
+ [](common_params & params) {
+ params.models_autoload = false;
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD"));
add_opt(common_arg(
{"--jinja"},
- "use jinja template for chat (default: disabled)",
+ string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
[](common_params & params) {
params.use_jinja = true;
}
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
+ add_opt(common_arg(
+ {"--no-jinja"},
+ string_format("disable jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
+ [](common_params & params) {
+ params.use_jinja = false;
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_NO_JINJA"));
add_opt(common_arg(
{"--reasoning-format"}, "FORMAT",
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
@@ -2626,7 +2682,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params &, const std::string & value) {
common_log_set_file(common_log_main(), value.c_str());
}
- ));
+ ).set_env("LLAMA_LOG_FILE"));
add_opt(common_arg(
{"--log-colors"}, "[on|off|auto]",
"Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"
@@ -2661,7 +2717,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_env("LLAMA_OFFLINE"));
add_opt(common_arg(
{"-lv", "--verbosity", "--log-verbosity"}, "N",
- "Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
+ string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"
+ " - 0: generic output\n"
+ " - 1: error\n"
+ " - 2: warning\n"
+ " - 3: info\n"
+ " - 4: debug\n"
+ "(default: %d)\n", params.verbosity),
[](common_params & params, int value) {
params.verbosity = value;
common_log_set_verbosity_thold(value);
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
index ff83102788..fe3e80037f 100644
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -1,6 +1,8 @@
#include "chat-parser.h"
+#include "chat-peg-parser.h"
#include "common.h"
#include "log.h"
+#include "peg-parser.h"
#include "regex-partial.h"
#include
@@ -13,6 +15,120 @@
using json = nlohmann::ordered_json;
+static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
+ const common_regex & prefix,
+ size_t rstrip_prefix = 0) {
+ static const std::vector> args_paths = { { "arguments" } };
+ if (auto res = builder.try_find_regex(prefix)) {
+ builder.move_back(rstrip_prefix);
+ auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
+ if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call array");
+ }
+ } else {
+ builder.add_content(builder.consume_rest());
+ }
+}
+
+static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
+ std::string arguments;
+ if (builder.is_partial()) {
+ arguments = (json{
+ { "code", code + builder.healing_marker() }
+ })
+ .dump();
+ auto idx = arguments.find(builder.healing_marker());
+ if (idx != std::string::npos) {
+ arguments.resize(idx);
+ }
+ } else {
+ arguments = (json{
+ { "code", code }
+ })
+ .dump();
+ }
+ return arguments;
+}
+
+/**
+ * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
+ * Aggregates the prefix, suffix and in-between text into the content.
+ */
+static void parse_json_tool_calls(
+ common_chat_msg_parser & builder,
+ const std::optional & block_open,
+ const std::optional & function_regex_start_only,
+ const std::optional & function_regex,
+ const common_regex & close_regex,
+ const std::optional & block_close,
+ bool allow_raw_python = false,
+ const std::function & get_function_name =
+ nullptr) {
+ auto parse_tool_calls = [&]() {
+ size_t from = std::string::npos;
+ auto first = true;
+ while (true) {
+ auto start_pos = builder.pos();
+ auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
+ function_regex ? builder.try_find_regex(*function_regex, from) :
+ std::nullopt;
+
+ if (res) {
+ std::string name;
+ if (get_function_name) {
+ name = get_function_name(*res);
+ } else {
+ GGML_ASSERT(res->groups.size() == 2);
+ name = builder.str(res->groups[1]);
+ }
+ first = false;
+ if (name.empty()) {
+ // get_function_name signalled us that we should skip this match and treat it as content.
+ from = res->groups[0].begin + 1;
+ continue;
+ }
+ from = std::string::npos;
+
+ auto maybe_raw_python = name == "python" && allow_raw_python;
+ if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
+ if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
+ if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ builder.consume_regex(close_regex);
+ }
+ continue;
+ }
+ if (maybe_raw_python) {
+ auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
+ if (!builder.add_tool_call(name, "", arguments)) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ return;
+ }
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ } else {
+ builder.move_to(start_pos);
+ }
+ break;
+ }
+ if (block_close) {
+ builder.consume_regex(*block_close);
+ }
+ builder.consume_spaces();
+ builder.add_content(builder.consume_rest());
+ };
+ if (block_open) {
+ if (auto res = builder.try_find_regex(*block_open)) {
+ parse_tool_calls();
+ } else {
+ builder.add_content(builder.consume_rest());
+ }
+ } else {
+ parse_tool_calls();
+ }
+}
+
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
: input_(input), is_partial_(is_partial), syntax_(syntax)
{
@@ -532,3 +648,895 @@ std::optional common_chat_msg_parse
void common_chat_msg_parser::clear_tools() {
result_.tool_calls.clear();
}
+
+/**
+ * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
+ * to reduce incremental compile time for parser changes.
+ */
+static void common_chat_parse_generic(common_chat_msg_parser & builder) {
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+ static const std::vector> content_paths = {
+ {"response"},
+ };
+ static const std::vector> args_paths = {
+ {"tool_call", "arguments"},
+ {"tool_calls", "arguments"},
+ };
+ auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
+ if (data.value.contains("tool_calls")) {
+ if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool calls");
+ }
+ } else if (data.value.contains("tool_call")) {
+ if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ } else if (data.value.contains("response")) {
+ const auto & response = data.value.at("response");
+ builder.add_content(response.is_string() ? response.template get() : response.dump(2));
+ if (data.is_partial) {
+ throw common_chat_msg_partial_exception("incomplete response");
+ }
+ } else {
+ throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
+ }
+}
+
+static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
+ parse_prefixed_json_tool_call_array(builder, prefix);
+}
+
+static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
+ builder.try_parse_reasoning("[THINK]", "[/THINK]");
+
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
+ parse_prefixed_json_tool_call_array(builder, prefix);
+}
+
+static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
+ builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
+
+ static const common_regex start_action_regex("<\\|START_ACTION\\|>");
+ static const common_regex end_action_regex("<\\|END_ACTION\\|>");
+ static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
+ static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
+
+ if (auto res = builder.try_find_regex(start_action_regex)) {
+ // If we didn't extract thoughts, prelude includes them.
+ auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
+ for (const auto & tool_call : tool_calls.value) {
+ std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
+ std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
+ std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
+ if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ }
+ if (tool_calls.is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ builder.consume_regex(end_action_regex);
+ } else if (auto res = builder.try_find_regex(start_response_regex)) {
+ if (!builder.try_find_regex(end_response_regex)) {
+ builder.add_content(builder.consume_rest());
+ throw common_chat_msg_partial_exception(end_response_regex.str());
+ }
+ } else {
+ builder.add_content(builder.consume_rest());
+ }
+}
+
+static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
+ builder.try_parse_reasoning("", "");
+
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ static const common_regex function_regex(
+ "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
+ static const common_regex close_regex("\\}\\s*");
+
+ static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
+ static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
+
+ if (with_builtin_tools) {
+ static const common_regex builtin_call_regex("<\\|python_tag\\|>");
+ if (auto res = builder.try_find_regex(builtin_call_regex)) {
+ auto fun_res = builder.consume_regex(function_name_regex);
+ auto function_name = builder.str(fun_res.groups[1]);
+
+ common_healing_marker healing_marker;
+ json args = json::object();
+ while (true) {
+ if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
+ auto arg_name = builder.str(arg_res->groups[1]);
+ auto partial = builder.consume_json();
+ args[arg_name] = partial.json;
+ healing_marker.marker = partial.healing_marker.marker;
+ healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
+ builder.consume_spaces();
+ if (!builder.try_consume_literal(",")) {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ builder.consume_literal(")");
+ builder.consume_spaces();
+
+ auto arguments = args.dump();
+ if (!builder.add_tool_call(function_name, "", arguments)) {
+ throw common_chat_msg_partial_exception("Incomplete tool call");
+ }
+ return;
+ }
+ }
+ parse_json_tool_calls(
+ builder,
+ /* block_open= */ std::nullopt,
+ /* function_regex_start_only= */ function_regex,
+ /* function_regex= */ std::nullopt,
+ close_regex,
+ std::nullopt);
+
+}
+
+static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
+ builder.try_parse_reasoning("", "");
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
+ static const common_regex tool_calls_end("<|tool▁calls▁end|>");
+ static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
+ static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
+
+ parse_json_tool_calls(
+ builder,
+ /* block_open= */ tool_calls_begin,
+ /* function_regex_start_only= */ std::nullopt,
+ function_regex,
+ close_regex,
+ tool_calls_end);
+}
+
+static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
+ static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
+
+ static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
+ static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
+ static const common_regex tool_calls_end("<|tool▁calls▁end|>");
+
+ if (!builder.syntax().parse_tool_calls) {
+ LOG_DBG("%s: not parse_tool_calls\n", __func__);
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ LOG_DBG("%s: parse_tool_calls\n", __func__);
+
+ parse_json_tool_calls(
+ builder,
+ /* block_open= */ tool_calls_begin,
+ /* function_regex_start_only= */ std::nullopt,
+ function_regex,
+ close_regex,
+ tool_calls_end);
+}
+
+static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
+ // DeepSeek V3.1 outputs reasoning content between "" and "" tags, followed by regular content
+ // First try to parse using the standard reasoning parsing method
+ LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
+
+ auto start_pos = builder.pos();
+ auto found_end_think = builder.try_find_literal("");
+ builder.move_to(start_pos);
+
+ if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
+ LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
+ common_chat_parse_deepseek_v3_1_content(builder);
+ } else if (builder.try_parse_reasoning("", "")) {
+ // If reasoning was parsed successfully, the remaining content is regular content
+ LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
+ // <|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
+ common_chat_parse_deepseek_v3_1_content(builder);
+ } else {
+ if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
+ LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
+ common_chat_parse_deepseek_v3_1_content(builder);
+ return;
+ }
+ // If no reasoning tags found, check if we should treat everything as reasoning
+ if (builder.syntax().thinking_forced_open) {
+ // If thinking is forced open but no tags found, treat everything as reasoning
+ LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
+ builder.add_reasoning_content(builder.consume_rest());
+ } else {
+ LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
+ // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
+ common_chat_parse_deepseek_v3_1_content(builder);
+ }
+ }
+}
+
+static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
+ static const xml_tool_call_format form {
+ /* form.scope_start = */ "",
+ /* form.tool_start = */ "",
+ /* form.key_start = */ "",
+ /* form.val_end = */ "",
+ /* form.tool_end = */ "",
+ /* form.scope_end = */ "",
+ };
+ builder.consume_reasoning_with_xml_tool_calls(form, "", "");
+}
+
+static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
+ static const xml_tool_call_format form = ([]() {
+ xml_tool_call_format form {};
+ form.scope_start = "";
+ form.tool_start = "", "");
+}
+
+static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
+ static const xml_tool_call_format form = ([]() {
+ xml_tool_call_format form {};
+ form.scope_start = "[";
+ form.tool_start = "{\"name\": \"";
+ form.tool_sep = "\", \"arguments\": {";
+ form.key_start = "\"";
+ form.key_val_sep = "\": ";
+ form.val_end = ", ";
+ form.tool_end = "}, ";
+ form.scope_end = "]";
+ form.raw_argval = false;
+ form.last_val_end = "";
+ form.last_tool_end = "}";
+ return form;
+ })();
+ builder.consume_reasoning_with_xml_tool_calls(form, "", "");
+}
+
+static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
+ static const xml_tool_call_format form = ([]() {
+ xml_tool_call_format form {};
+ form.scope_start = "";
+ form.tool_start = "\n{\"name\": \"";
+ form.tool_sep = "\", \"arguments\": {";
+ form.key_start = "\"";
+ form.key_val_sep = "\": ";
+ form.val_end = ", ";
+ form.tool_end = "}\n";
+ form.scope_end = "";
+ form.raw_argval = false;
+ form.last_val_end = "";
+ return form;
+ })();
+ builder.consume_reasoning_with_xml_tool_calls(form);
+}
+
+static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
+ static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
+ static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
+
+ static const common_regex start_regex("<\\|start\\|>assistant");
+ static const common_regex analysis_regex("<\\|channel\\|>analysis");
+ static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
+ static const common_regex preamble_regex("<\\|channel\\|>commentary");
+ static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
+ static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
+
+ auto consume_end = [&](bool include_end = false) {
+ if (auto res = builder.try_find_literal("<|end|>")) {
+ return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
+ }
+ return builder.consume_rest();
+ };
+
+ auto handle_tool_call = [&](const std::string & name) {
+ if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
+ if (builder.syntax().parse_tool_calls) {
+ if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ } else if (args->is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ }
+ };
+
+ auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional {
+ auto match = regex.search(input, 0, true);
+ if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
+ return match;
+ }
+ return std::nullopt;
+ };
+
+ do {
+ auto header_start_pos = builder.pos();
+ auto content_start = builder.try_find_literal("<|message|>");
+ if (!content_start) {
+ throw common_chat_msg_partial_exception("incomplete header");
+ }
+
+ auto header = content_start->prelude;
+
+ if (auto match = regex_match(tool_call1_regex, header)) {
+ auto group = match->groups[1];
+ auto name = header.substr(group.begin, group.end - group.begin);
+ handle_tool_call(name);
+ continue;
+ }
+
+ if (auto match = regex_match(tool_call2_regex, header)) {
+ auto group = match->groups[2];
+ auto name = header.substr(group.begin, group.end - group.begin);
+ handle_tool_call(name);
+ continue;
+ }
+
+ if (regex_match(analysis_regex, header)) {
+ builder.move_to(header_start_pos);
+ if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
+ builder.add_content(consume_end(true));
+ } else {
+ builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
+ }
+ continue;
+ }
+
+ if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
+ builder.add_content(consume_end());
+ continue;
+ }
+
+ // Possibly a malformed message, attempt to recover by rolling
+ // back to pick up the next <|start|>
+ LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
+ builder.move_to(header_start_pos);
+ } while (builder.try_find_regex(start_regex, std::string::npos, false));
+
+ auto remaining = builder.consume_rest();
+ if (!remaining.empty()) {
+ LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
+ }
+}
+
+static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
+ static const xml_tool_call_format form {
+ /* form.scope_start = */ "",
+ /* form.tool_start = */ "",
+ /* form.tool_sep = */ "",
+ /* form.key_start = */ "",
+ /* form.key_val_sep = */ "",
+ /* form.val_end = */ "",
+ /* form.tool_end = */ "",
+ /* form.scope_end = */ "",
+ /* form.key_val_sep2 = */ "",
+ };
+ builder.consume_reasoning_with_xml_tool_calls(form, "", "");
+}
+
+static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+ static const common_regex prefix(regex_escape(" functools["));
+ parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
+}
+
+static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
+ static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
+ static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
+ static const common_regex close_regex(R"(\s*)");
+
+ parse_json_tool_calls(
+ builder,
+ std::nullopt,
+ function_regex_start_only,
+ function_regex,
+ close_regex,
+ std::nullopt,
+ /* allow_raw_python= */ true,
+ /* get_function_name= */ [&](const auto & res) -> std::string {
+ auto at_start = res.groups[0].begin == 0;
+ auto name = builder.str(res.groups[1]);
+ if (!name.empty() && name.back() == '{') {
+ // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
+ builder.move_back(1);
+ }
+ auto idx = name.find_last_not_of("\n{");
+ name = name.substr(0, idx + 1);
+ if (at_start && name == "all") {
+ return "";
+ }
+ return name;
+ });
+}
+
+static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+ // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
+ static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
+
+ static const common_regex function_regex(R"()");
+ static const common_regex close_regex(R"()");
+
+ parse_json_tool_calls(
+ builder,
+ /* block_open= */ std::nullopt,
+ /* function_regex_start_only= */ std::nullopt,
+ function_regex,
+ close_regex,
+ std::nullopt);
+
+ if (auto res = builder.try_find_regex(python_tag_regex)) {
+ auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
+ builder.add_tool_call("python", "", arguments);
+ return;
+ }
+}
+
+static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
+ builder.try_parse_reasoning("", "");
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ static const common_regex open_regex(
+ "(?:"
+ "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
+ "(" // match 2 (open_tag)
+ ""
+ "|"
+ "|"
+ "|"
+ "|"
+ "|"
+ "|"
+ "|"
+ ")?"
+ "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
+ ")"
+ "|]+)>" // match 4 (function name)
+ "|" // match 5 (function name again)
+ );
+
+ while (auto res = builder.try_find_regex(open_regex)) {
+ const auto & block_start = res->groups[1];
+ std::string block_end = block_start.empty() ? "" : "```";
+
+ const auto & open_tag = res->groups[2];
+ std::string close_tag;
+
+ if (!res->groups[3].empty()) {
+ builder.move_to(res->groups[3].begin);
+ close_tag = open_tag.empty() ? "" : "" + builder.str(open_tag).substr(1);
+
+ if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
+ if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ builder.consume_spaces();
+ builder.consume_literal(close_tag);
+ builder.consume_spaces();
+ if (!block_end.empty()) {
+ builder.consume_literal(block_end);
+ builder.consume_spaces();
+ }
+ } else {
+ throw common_chat_msg_partial_exception("failed to parse tool call");
+ }
+ } else {
+ auto function_name = builder.str(res->groups[4]);
+ if (function_name.empty()) {
+ function_name = builder.str(res->groups[5]);
+ }
+ GGML_ASSERT(!function_name.empty());
+
+ close_tag = "";
+
+ if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
+ if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ builder.consume_spaces();
+ builder.consume_literal(close_tag);
+ builder.consume_spaces();
+ if (!block_end.empty()) {
+ builder.consume_literal(block_end);
+ builder.consume_spaces();
+ }
+ }
+ }
+ }
+
+ builder.add_content(builder.consume_rest());
+}
+
+static void common_chat_parse_granite(common_chat_msg_parser & builder) {
+ // Parse thinking tags
+ static const common_regex start_think_regex(regex_escape(""));
+ static const common_regex end_think_regex(regex_escape(""));
+ // Granite models output partial tokens such as "<" and "groups[0].begin);
+ builder.try_find_regex(end_think_regex, std::string::npos, false);
+ // Restore position for try_parse_reasoning()
+ builder.move_to(res->groups[0].begin);
+ }
+ builder.try_parse_reasoning("", "");
+
+ // Parse response tags
+ static const common_regex start_response_regex(regex_escape(""));
+ static const common_regex end_response_regex(regex_escape(""));
+ // Granite models output partial tokens such as "<" and ""));
+ if (auto res = builder.try_find_regex(tool_call_regex)) {
+ builder.move_to(res->groups[0].end);
+
+ // Expect JSON array of tool calls
+ if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
+ if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
+ throw common_chat_msg_partial_exception("incomplete tool call");
+ }
+ }
+ } else {
+ builder.add_content(builder.consume_rest());
+ }
+}
+
+static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
+ // Parse thinking tags
+ builder.try_parse_reasoning("", "");
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ // Look for tool calls
+ static const common_regex tool_call_regex(regex_escape(""));
+ if (auto res = builder.try_find_regex(tool_call_regex)) {
+ builder.move_to(res->groups[0].end);
+
+ // Expect JSON array of tool calls
+ auto tool_calls_data = builder.consume_json();
+ if (tool_calls_data.json.is_array()) {
+ if (!builder.try_consume_literal("")) {
+ throw common_chat_msg_partial_exception("Incomplete tool call");
+ }
+ builder.add_tool_calls(tool_calls_data.json);
+ } else {
+ throw common_chat_msg_partial_exception("Incomplete tool call");
+ }
+ }
+ builder.add_content(builder.consume_rest());
+}
+
+static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
+ // Parse thinking tags
+ builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ // Look for tool calls
+ static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
+ if (auto res = builder.try_find_regex(tool_call_regex)) {
+ builder.move_to(res->groups[0].end);
+
+ auto tool_calls_data = builder.consume_json();
+ if (tool_calls_data.json.is_array()) {
+ builder.consume_spaces();
+ if (!builder.try_consume_literal("<|tools_suffix|>")) {
+ throw common_chat_msg_partial_exception("Incomplete tool call");
+ }
+ for (const auto & value : tool_calls_data.json) {
+ if (value.is_object()) {
+ builder.add_tool_call_short_form(value);
+ }
+ }
+ } else {
+ throw common_chat_msg_partial_exception("Incomplete tool call");
+ }
+ }
+ builder.add_content(builder.consume_rest());
+}
+
+
+static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
+ if (!builder.syntax().parse_tool_calls) {
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
+ static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
+ static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
+
+ // Loop through all tool calls
+ while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
+ builder.move_to(res->groups[0].end);
+
+ // Parse JSON array format: [{"name": "...", "arguments": {...}}]
+ auto tool_calls_data = builder.consume_json();
+
+ // Consume end marker
+ builder.consume_spaces();
+ if (!builder.try_consume_regex(tool_call_end_regex)) {
+ throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
+ }
+
+ // Process each tool call in the array
+ if (tool_calls_data.json.is_array()) {
+ for (const auto & tool_call : tool_calls_data.json) {
+ if (!tool_call.is_object()) {
+ throw common_chat_msg_partial_exception("Tool call must be an object");
+ }
+
+ if (!tool_call.contains("name")) {
+ throw common_chat_msg_partial_exception("Tool call missing 'name' field");
+ }
+
+ std::string function_name = tool_call.at("name");
+ std::string arguments = "{}";
+
+ if (tool_call.contains("arguments")) {
+ if (tool_call.at("arguments").is_object()) {
+ arguments = tool_call.at("arguments").dump();
+ } else if (tool_call.at("arguments").is_string()) {
+ arguments = tool_call.at("arguments");
+ }
+ }
+
+ if (!builder.add_tool_call(function_name, "", arguments)) {
+ throw common_chat_msg_partial_exception("Incomplete tool call");
+ }
+ }
+ } else {
+ throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
+ }
+
+ // Consume any trailing whitespace after this tool call
+ builder.consume_spaces();
+ }
+
+ // Consume any remaining content after all tool calls
+ auto remaining = builder.consume_rest();
+ if (!string_strip(remaining).empty()) {
+ builder.add_content(remaining);
+ }
+}
+
+static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
+ static const xml_tool_call_format form {
+ /* form.scope_start = */ "",
+ /* form.tool_start = */ "",
+ /* form.key_start = */ "",
+ /* form.val_end = */ "",
+ /* form.tool_end = */ "",
+ /* form.scope_end = */ "",
+ };
+ builder.consume_reasoning_with_xml_tool_calls(form, "", "");
+}
+
+static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
+ builder.try_parse_reasoning("", "");
+ builder.add_content(builder.consume_rest());
+}
+
+static void common_chat_parse(common_chat_msg_parser & builder) {
+ LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
+
+ switch (builder.syntax().format) {
+ case COMMON_CHAT_FORMAT_CONTENT_ONLY:
+ common_chat_parse_content_only(builder);
+ break;
+ case COMMON_CHAT_FORMAT_GENERIC:
+ common_chat_parse_generic(builder);
+ break;
+ case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
+ common_chat_parse_mistral_nemo(builder);
+ break;
+ case COMMON_CHAT_FORMAT_MAGISTRAL:
+ common_chat_parse_magistral(builder);
+ break;
+ case COMMON_CHAT_FORMAT_LLAMA_3_X:
+ common_chat_parse_llama_3_1(builder);
+ break;
+ case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
+ common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
+ break;
+ case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
+ common_chat_parse_deepseek_r1(builder);
+ break;
+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
+ common_chat_parse_deepseek_v3_1(builder);
+ break;
+ case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
+ common_chat_parse_functionary_v3_2(builder);
+ break;
+ case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
+ common_chat_parse_functionary_v3_1_llama_3_1(builder);
+ break;
+ case COMMON_CHAT_FORMAT_HERMES_2_PRO:
+ common_chat_parse_hermes_2_pro(builder);
+ break;
+ case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
+ common_chat_parse_firefunction_v2(builder);
+ break;
+ case COMMON_CHAT_FORMAT_COMMAND_R7B:
+ common_chat_parse_command_r7b(builder);
+ break;
+ case COMMON_CHAT_FORMAT_GRANITE:
+ common_chat_parse_granite(builder);
+ break;
+ case COMMON_CHAT_FORMAT_GPT_OSS:
+ common_chat_parse_gpt_oss(builder);
+ break;
+ case COMMON_CHAT_FORMAT_SEED_OSS:
+ common_chat_parse_seed_oss(builder);
+ break;
+ case COMMON_CHAT_FORMAT_NEMOTRON_V2:
+ common_chat_parse_nemotron_v2(builder);
+ break;
+ case COMMON_CHAT_FORMAT_APERTUS:
+ common_chat_parse_apertus(builder);
+ break;
+ case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
+ common_chat_parse_lfm2(builder);
+ break;
+ case COMMON_CHAT_FORMAT_MINIMAX_M2:
+ common_chat_parse_minimax_m2(builder);
+ break;
+ case COMMON_CHAT_FORMAT_GLM_4_5:
+ common_chat_parse_glm_4_5(builder);
+ break;
+ case COMMON_CHAT_FORMAT_KIMI_K2:
+ common_chat_parse_kimi_k2(builder);
+ break;
+ case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
+ common_chat_parse_qwen3_coder_xml(builder);
+ break;
+ case COMMON_CHAT_FORMAT_APRIEL_1_5:
+ common_chat_parse_apriel_1_5(builder);
+ break;
+ case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
+ common_chat_parse_xiaomi_mimo(builder);
+ break;
+ default:
+ throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
+ }
+ builder.finish();
+}
+
+common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+ if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
+ syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
+ syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
+ return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
+ }
+ common_chat_msg_parser builder(input, is_partial, syntax);
+ try {
+ common_chat_parse(builder);
+ } catch (const common_chat_msg_partial_exception & ex) {
+ LOG_DBG("Partial parse: %s\n", ex.what());
+ if (!is_partial) {
+ builder.clear_tools();
+ builder.move_to(0);
+ common_chat_parse_content_only(builder);
+ }
+ }
+ auto msg = builder.result();
+ if (!is_partial) {
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
+ }
+ return msg;
+}
+
+common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+ if (parser.empty()) {
+ throw std::runtime_error("Failed to parse due to missing parser definition.");
+ }
+
+ LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
+
+ common_peg_parse_context ctx(input, is_partial);
+ auto result = parser.parse(ctx);
+ if (result.fail()) {
+ throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
+ }
+
+ common_chat_msg msg;
+ msg.role = "assistant";
+
+ if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
+ auto mapper = common_chat_peg_native_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+ } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
+ auto mapper = common_chat_peg_constructed_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+ } else {
+ // Generic mapper
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+ }
+ if (!is_partial) {
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
+ }
+ return msg;
+}
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
new file mode 100644
index 0000000000..74a7b6a46d
--- /dev/null
+++ b/common/chat-peg-parser.cpp
@@ -0,0 +1,114 @@
+#include "chat-peg-parser.h"
+
+#include
+
+using json = nlohmann::json;
+
+static std::string_view trim_trailing_space(std::string_view sv) {
+ while (!sv.empty() && std::isspace(static_cast(sv.back()))) {
+ sv.remove_suffix(1);
+ }
+ return sv;
+}
+
+void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
+ arena.visit(result, [this](const common_peg_ast_node & node) {
+ map(node);
+ });
+}
+
+void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
+ bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
+ bool is_content = node.tag == common_chat_peg_builder::CONTENT;
+
+ if (is_reasoning) {
+ result.reasoning_content = std::string(trim_trailing_space(node.text));
+ }
+
+ if (is_content) {
+ result.content = std::string(trim_trailing_space(node.text));
+ }
+}
+
+void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
+ common_chat_peg_mapper::map(node);
+
+ bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
+ bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
+ bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
+ bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
+
+ if (is_tool_open) {
+ result.tool_calls.emplace_back();
+ current_tool = &result.tool_calls.back();
+ }
+
+ if (is_tool_id && current_tool) {
+ current_tool->id = std::string(trim_trailing_space(node.text));
+ }
+
+ if (is_tool_name && current_tool) {
+ current_tool->name = std::string(trim_trailing_space(node.text));
+ }
+
+ if (is_tool_args && current_tool) {
+ current_tool->arguments = std::string(trim_trailing_space(node.text));
+ }
+}
+
+void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
+ common_chat_peg_mapper::map(node);
+
+ bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
+ bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
+ bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
+ bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
+ bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
+ bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
+ bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
+ bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
+
+ if (is_tool_open) {
+ result.tool_calls.emplace_back();
+ current_tool = &result.tool_calls.back();
+ arg_count = 0;
+ }
+
+ if (is_tool_name) {
+ current_tool->name = std::string(node.text);
+ current_tool->arguments = "{";
+ }
+
+ if (is_arg_open) {
+ needs_closing_quote = false;
+ }
+
+ if (is_arg_name && current_tool) {
+ if (arg_count > 0) {
+ current_tool->arguments += ",";
+ }
+ current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
+ ++arg_count;
+ }
+
+ if (is_arg_string && current_tool) {
+ // Serialize to JSON, but exclude the end quote
+ std::string dumped = json(node.text).dump();
+ current_tool->arguments += dumped.substr(0, dumped.size() - 1);
+ needs_closing_quote = true;
+ }
+
+ if (is_arg_close && current_tool) {
+ if (needs_closing_quote) {
+ current_tool->arguments += "\"";
+ }
+ }
+
+ if (is_arg_json && current_tool) {
+ current_tool->arguments += std::string(trim_trailing_space(node.text));
+ }
+
+ if (is_tool_close && current_tool) {
+ current_tool->arguments += "}";
+ }
+}
diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h
new file mode 100644
index 0000000000..b84cbed206
--- /dev/null
+++ b/common/chat-peg-parser.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#include "chat.h"
+#include "peg-parser.h"
+
+class common_chat_peg_builder : public common_peg_parser_builder {
+ public:
+ static constexpr const char * REASONING_BLOCK = "reasoning-block";
+ static constexpr const char * REASONING = "reasoning";
+ static constexpr const char * CONTENT = "content";
+
+ common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
+ common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
+ common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
+};
+
+inline common_peg_arena build_chat_peg_parser(const std::function & fn) {
+ common_chat_peg_builder builder;
+ builder.set_root(fn(builder));
+ return builder.build();
+}
+
+class common_chat_peg_mapper {
+ public:
+ common_chat_msg & result;
+
+ common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
+
+ virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
+ virtual void map(const common_peg_ast_node & node);
+};
+
+class common_chat_peg_native_builder : public common_chat_peg_builder {
+ public:
+ static constexpr const char * TOOL = "tool";
+ static constexpr const char * TOOL_OPEN = "tool-open";
+ static constexpr const char * TOOL_CLOSE = "tool-close";
+ static constexpr const char * TOOL_ID = "tool-id";
+ static constexpr const char * TOOL_NAME = "tool-name";
+ static constexpr const char * TOOL_ARGS = "tool-args";
+
+ common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
+ common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
+ common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+ common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
+ common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+ common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
+};
+
+class common_chat_peg_native_mapper : public common_chat_peg_mapper {
+ common_chat_tool_call * current_tool;
+
+ public:
+ common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+
+ void map(const common_peg_ast_node & node) override;
+};
+
+inline common_peg_arena build_chat_peg_native_parser(const std::function & fn) {
+ common_chat_peg_native_builder builder;
+ builder.set_root(fn(builder));
+ return builder.build();
+}
+
+class common_chat_peg_constructed_builder : public common_chat_peg_builder {
+ public:
+ static constexpr const char * TOOL = "tool";
+ static constexpr const char * TOOL_OPEN = "tool-open";
+ static constexpr const char * TOOL_CLOSE = "tool-close";
+ static constexpr const char * TOOL_NAME = "tool-name";
+ static constexpr const char * TOOL_ARG = "tool-arg";
+ static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
+ static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
+ static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
+ static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
+ static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
+
+ common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
+ common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
+ common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+ common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+ common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
+ common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
+ common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
+ common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
+ common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
+ common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
+};
+
+class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
+ common_chat_tool_call * current_tool;
+ int arg_count = 0;
+ bool needs_closing_quote = false;
+
+ public:
+ common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+
+ void map(const common_peg_ast_node & node) override;
+};
+
+inline common_peg_arena build_chat_peg_constructed_parser(const std::function & fn) {
+ common_chat_peg_constructed_builder builder;
+ builder.set_root(fn(builder));
+ return builder.build();
+}
diff --git a/common/chat.cpp b/common/chat.cpp
index 6fa05a6041..41a5bb42d5 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -85,29 +85,36 @@ json common_chat_msg::to_json_oaicompat() const
return message;
}
-std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
+std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
std::vector diffs;
- if (previous_msg.reasoning_content != new_msg.reasoning_content) {
- auto & diff = diffs.emplace_back();
- diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
- }
- if (previous_msg.content != new_msg.content) {
- auto & diff = diffs.emplace_back();
- diff.content_delta = string_diff(previous_msg.content, new_msg.content);
+ if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
+ diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
+ } else {
+ diffs.reserve(3);
}
- if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
+ // TODO: these can become expensive for long messages - how to optimize?
+ if (msg_prv.reasoning_content != msg_new.reasoning_content) {
+ auto & diff = diffs.emplace_back();
+ diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
+ }
+ if (msg_prv.content != msg_new.content) {
+ auto & diff = diffs.emplace_back();
+ diff.content_delta = string_diff(msg_prv.content, msg_new.content);
+ }
+
+ if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
throw std::runtime_error("Invalid diff: now finding less tool calls!");
}
- if (!previous_msg.tool_calls.empty()) {
- auto idx = previous_msg.tool_calls.size() - 1;
- const auto & pref = previous_msg.tool_calls[idx];
- const auto & newf = new_msg.tool_calls[idx];
+ if (!msg_prv.tool_calls.empty()) {
+ const auto idx = msg_prv.tool_calls.size() - 1;
+ const auto & pref = msg_prv.tool_calls[idx];
+ const auto & newf = msg_new.tool_calls[idx];
if (pref.name != newf.name) {
throw std::runtime_error("Invalid diff: tool call mismatch!");
}
- auto args_diff = string_diff(pref.arguments, newf.arguments);
+ const auto args_diff = string_diff(pref.arguments, newf.arguments);
if (!args_diff.empty() || pref.id != newf.id) {
auto & diff = diffs.emplace_back();
diff.tool_call_index = idx;
@@ -118,11 +125,12 @@ std::vector common_chat_msg_diff::compute_diffs(const comm
diff.tool_call_delta.arguments = args_diff;
}
}
- for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
+ for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
auto & diff = diffs.emplace_back();
diff.tool_call_index = idx;
- diff.tool_call_delta = new_msg.tool_calls[idx];
+ diff.tool_call_delta = msg_new.tool_calls[idx];
}
+
return diffs;
}
@@ -163,7 +171,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
if (tool_choice == "required") {
return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
}
- throw std::runtime_error("Invalid tool_choice: " + tool_choice);
+ throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
}
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
@@ -186,17 +194,17 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
try {
if (!messages.is_array()) {
- throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
+ throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
}
for (const auto & message : messages) {
if (!message.is_object()) {
- throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
+ throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
}
common_chat_msg msg;
if (!message.contains("role")) {
- throw std::runtime_error("Missing 'role' in message: " + message.dump());
+ throw std::invalid_argument("Missing 'role' in message: " + message.dump());
}
msg.role = message.at("role");
@@ -209,11 +217,11 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
} else if (content.is_array()) {
for (const auto & part : content) {
if (!part.contains("type")) {
- throw std::runtime_error("Missing content part type: " + part.dump());
+ throw std::invalid_argument("Missing content part type: " + part.dump());
}
const auto & type = part.at("type");
if (type != "text") {
- throw std::runtime_error("Unsupported content part type: " + type.dump());
+ throw std::invalid_argument("Unsupported content part type: " + type.dump());
}
common_chat_msg_content_part msg_part;
msg_part.type = type;
@@ -221,25 +229,25 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
msg.content_parts.push_back(msg_part);
}
} else if (!content.is_null()) {
- throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+ throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
}
}
if (has_tool_calls) {
for (const auto & tool_call : message.at("tool_calls")) {
common_chat_tool_call tc;
if (!tool_call.contains("type")) {
- throw std::runtime_error("Missing tool call type: " + tool_call.dump());
+ throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
}
const auto & type = tool_call.at("type");
if (type != "function") {
- throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
+ throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
}
if (!tool_call.contains("function")) {
- throw std::runtime_error("Missing tool call function: " + tool_call.dump());
+ throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
}
const auto & fc = tool_call.at("function");
if (!fc.contains("name")) {
- throw std::runtime_error("Missing tool call name: " + tool_call.dump());
+ throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
}
tc.name = fc.at("name");
tc.arguments = fc.at("arguments");
@@ -250,7 +258,7 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
}
}
if (!has_content && !has_tool_calls) {
- throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+ throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
}
if (message.contains("reasoning_content")) {
msg.reasoning_content = message.at("reasoning_content");
@@ -353,18 +361,18 @@ std::vector common_chat_tools_parse_oaicompat(const json & too
try {
if (!tools.is_null()) {
if (!tools.is_array()) {
- throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
+ throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
}
for (const auto & tool : tools) {
if (!tool.contains("type")) {
- throw std::runtime_error("Missing tool type: " + tool.dump());
+ throw std::invalid_argument("Missing tool type: " + tool.dump());
}
const auto & type = tool.at("type");
if (!type.is_string() || type != "function") {
- throw std::runtime_error("Unsupported tool type: " + tool.dump());
+ throw std::invalid_argument("Unsupported tool type: " + tool.dump());
}
if (!tool.contains("function")) {
- throw std::runtime_error("Missing tool function: " + tool.dump());
+ throw std::invalid_argument("Missing tool function: " + tool.dump());
}
const auto & function = tool.at("function");
@@ -649,6 +657,9 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
+ case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
+ case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
+ case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
default:
throw std::runtime_error("Unknown chat format");
}
@@ -678,114 +689,6 @@ common_reasoning_format common_reasoning_format_from_name(const std::string & fo
throw std::runtime_error("Unknown reasoning format: " + format);
}
-static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
- std::string arguments;
- if (builder.is_partial()) {
- arguments = (json {{"code", code + builder.healing_marker()}}).dump();
- auto idx = arguments.find(builder.healing_marker());
- if (idx != std::string::npos) {
- arguments.resize(idx);
- }
- } else {
- arguments = (json {{"code", code}}).dump();
- }
- return arguments;
-}
-
-/**
- * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
- * Aggregates the prefix, suffix and in-between text into the content.
- */
-static void parse_json_tool_calls(
- common_chat_msg_parser & builder,
- const std::optional & block_open,
- const std::optional & function_regex_start_only,
- const std::optional & function_regex,
- const common_regex & close_regex,
- const std::optional & block_close,
- bool allow_raw_python = false,
- const std::function & get_function_name = nullptr) {
-
- auto parse_tool_calls = [&]() {
- size_t from = std::string::npos;
- auto first = true;
- while (true) {
- auto start_pos = builder.pos();
- auto res = function_regex_start_only && first
- ? builder.try_consume_regex(*function_regex_start_only)
- : function_regex
- ? builder.try_find_regex(*function_regex, from)
- : std::nullopt;
-
- if (res) {
- std::string name;
- if (get_function_name) {
- name = get_function_name(*res);
- } else {
- GGML_ASSERT(res->groups.size() == 2);
- name = builder.str(res->groups[1]);
- }
- first = false;
- if (name.empty()) {
- // get_function_name signalled us that we should skip this match and treat it as content.
- from = res->groups[0].begin + 1;
- continue;
- }
- from = std::string::npos;
-
- auto maybe_raw_python = name == "python" && allow_raw_python;
- if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
- if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
- if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_regex(close_regex);
- }
- continue;
- }
- if (maybe_raw_python) {
- auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
- if (!builder.add_tool_call(name, "", arguments)) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- return;
- }
- throw common_chat_msg_partial_exception("incomplete tool call");
- } else {
- builder.move_to(start_pos);
- }
- break;
- }
- if (block_close) {
- builder.consume_regex(*block_close);
- }
- builder.consume_spaces();
- builder.add_content(builder.consume_rest());
- };
- if (block_open) {
- if (auto res = builder.try_find_regex(*block_open)) {
- parse_tool_calls();
- } else {
- builder.add_content(builder.consume_rest());
- }
- } else {
- parse_tool_calls();
- }
-}
-
-static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, const common_regex & prefix, size_t rstrip_prefix = 0) {
- static const std::vector> args_paths = {{"arguments"}};
- if (auto res = builder.try_find_regex(prefix)) {
- builder.move_back(rstrip_prefix);
- auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
- if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call array");
- }
- } else {
- builder.add_content(builder.consume_rest());
- }
-}
-
static void foreach_function(const json & tools, const std::function & fn) {
for (const auto & tool : tools) {
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
@@ -918,37 +821,6 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
data.format = COMMON_CHAT_FORMAT_GENERIC;
return data;
}
-static void common_chat_parse_generic(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
- static const std::vector> content_paths = {
- {"response"},
- };
- static const std::vector> args_paths = {
- {"tool_call", "arguments"},
- {"tool_calls", "arguments"},
- };
- auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
- if (data.value.contains("tool_calls")) {
- if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool calls");
- }
- } else if (data.value.contains("tool_call")) {
- if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- } else if (data.value.contains("response")) {
- const auto & response = data.value.at("response");
- builder.add_content(response.is_string() ? response.template get() : response.dump(2));
- if (data.is_partial) {
- throw common_chat_msg_partial_exception("incomplete response");
- }
- } else {
- throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
- }
-}
static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
@@ -1173,28 +1045,6 @@ static common_chat_params common_chat_params_init_magistral(const common_chat_te
return data;
}
-static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
- parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("[THINK]", "[/THINK]");
-
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
- parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
@@ -1275,39 +1125,6 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
return data;
}
-static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
- static const common_regex start_action_regex("<\\|START_ACTION\\|>");
- static const common_regex end_action_regex("<\\|END_ACTION\\|>");
- static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
- static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
- if (auto res = builder.try_find_regex(start_action_regex)) {
- // If we didn't extract thoughts, prelude includes them.
- auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
- for (const auto & tool_call : tool_calls.value) {
- std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
- std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
- std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
- if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- }
- if (tool_calls.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_regex(end_action_regex);
- } else if (auto res = builder.try_find_regex(start_response_regex)) {
- if (!builder.try_find_regex(end_response_regex)) {
- builder.add_content(builder.consume_rest());
- throw common_chat_msg_partial_exception(end_response_regex.str());
- }
- } else {
- builder.add_content(builder.consume_rest());
- }
-}
-
static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) {
if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
@@ -1536,63 +1353,6 @@ static common_chat_params common_chat_params_init_apertus(const common_chat_temp
}
return data;
}
-static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
- builder.try_parse_reasoning("", "");
-
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex function_regex(
- "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
- static const common_regex close_regex("\\}\\s*");
-
- static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
- static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
-
- if (with_builtin_tools) {
- static const common_regex builtin_call_regex("<\\|python_tag\\|>");
- if (auto res = builder.try_find_regex(builtin_call_regex)) {
- auto fun_res = builder.consume_regex(function_name_regex);
- auto function_name = builder.str(fun_res.groups[1]);
-
- common_healing_marker healing_marker;
- json args = json::object();
- while (true) {
- if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
- auto arg_name = builder.str(arg_res->groups[1]);
- auto partial = builder.consume_json();
- args[arg_name] = partial.json;
- healing_marker.marker = partial.healing_marker.marker;
- healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
- builder.consume_spaces();
- if (!builder.try_consume_literal(",")) {
- break;
- }
- } else {
- break;
- }
- }
- builder.consume_literal(")");
- builder.consume_spaces();
-
- auto arguments = args.dump();
- if (!builder.add_tool_call(function_name, "", arguments)) {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- return;
- }
- }
- parse_json_tool_calls(
- builder,
- /* block_open= */ std::nullopt,
- /* function_regex_start_only= */ function_regex,
- /* function_regex= */ std::nullopt,
- close_regex,
- std::nullopt);
-
-}
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
@@ -1732,88 +1492,6 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
return data;
}
-static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("", "");
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
- static const common_regex tool_calls_end("<|tool▁calls▁end|>");
- static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
- static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
-
- parse_json_tool_calls(
- builder,
- /* block_open= */ tool_calls_begin,
- /* function_regex_start_only= */ std::nullopt,
- function_regex,
- close_regex,
- tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
- static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
-
- static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
- static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
- static const common_regex tool_calls_end("<|tool▁calls▁end|>");
-
- if (!builder.syntax().parse_tool_calls) {
- LOG_DBG("%s: not parse_tool_calls\n", __func__);
- builder.add_content(builder.consume_rest());
- return;
- }
-
- LOG_DBG("%s: parse_tool_calls\n", __func__);
-
- parse_json_tool_calls(
- builder,
- /* block_open= */ tool_calls_begin,
- /* function_regex_start_only= */ std::nullopt,
- function_regex,
- close_regex,
- tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
- // DeepSeek V3.1 outputs reasoning content between "" and "" tags, followed by regular content
- // First try to parse using the standard reasoning parsing method
- LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
- auto start_pos = builder.pos();
- auto found_end_think = builder.try_find_literal("");
- builder.move_to(start_pos);
-
- if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
- LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
- common_chat_parse_deepseek_v3_1_content(builder);
- } else if (builder.try_parse_reasoning("", "")) {
- // If reasoning was parsed successfully, the remaining content is regular content
- LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
- // <|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
- common_chat_parse_deepseek_v3_1_content(builder);
- } else {
- if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
- LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
- common_chat_parse_deepseek_v3_1_content(builder);
- return;
- }
- // If no reasoning tags found, check if we should treat everything as reasoning
- if (builder.syntax().thinking_forced_open) {
- // If thinking is forced open but no tags found, treat everything as reasoning
- LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
- builder.add_reasoning_content(builder.consume_rest());
- } else {
- LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
- // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
- common_chat_parse_deepseek_v3_1_content(builder);
- }
- }
-}
-
-
static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
common_chat_params data;
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1856,20 +1534,6 @@ static common_chat_params common_chat_params_init_minimax_m2(const common_chat_t
return data;
}
-static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form {
- /* form.scope_start = */ "",
- /* form.tool_start = */ "",
- /* form.key_start = */ "",
- /* form.val_end = */ "",
- /* form.tool_end = */ "",
- /* form.scope_end = */ "",
- };
- builder.consume_reasoning_with_xml_tool_calls(form, "", "");
-}
-
static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
common_chat_params data;
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1902,23 +1566,6 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c
return data;
}
-static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "";
- form.tool_start = "", "");
-}
-
static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
common_chat_params data;
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -2016,25 +1645,6 @@ static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_t
return data;
}
-static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "[";
- form.tool_start = "{\"name\": \"";
- form.tool_sep = "\", \"arguments\": {";
- form.key_start = "\"";
- form.key_val_sep = "\": ";
- form.val_end = ", ";
- form.tool_end = "}, ";
- form.scope_end = "]";
- form.raw_argval = false;
- form.last_val_end = "";
- form.last_tool_end = "}";
- return form;
- })();
- builder.consume_reasoning_with_xml_tool_calls(form, "", "");
-}
-
static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
common_chat_params data;
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -2067,24 +1677,6 @@ static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_
return data;
}
-static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "";
- form.tool_start = "\n{\"name\": \"";
- form.tool_sep = "\", \"arguments\": {";
- form.key_start = "\"";
- form.key_val_sep = "\": ";
- form.val_end = ", ";
- form.tool_end = "}\n";
- form.scope_end = "";
- form.raw_argval = false;
- form.last_val_end = "";
- return form;
- })();
- builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
@@ -2231,93 +1823,6 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
return data;
}
-static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
- static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
- static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
-
- static const common_regex start_regex("<\\|start\\|>assistant");
- static const common_regex analysis_regex("<\\|channel\\|>analysis");
- static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
- static const common_regex preamble_regex("<\\|channel\\|>commentary");
- static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
- static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
-
- auto consume_end = [&](bool include_end = false) {
- if (auto res = builder.try_find_literal("<|end|>")) {
- return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
- }
- return builder.consume_rest();
- };
-
- auto handle_tool_call = [&](const std::string & name) {
- if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
- if (builder.syntax().parse_tool_calls) {
- if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- } else if (args->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- }
- };
-
- auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional {
- auto match = regex.search(input, 0, true);
- if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
- return match;
- }
- return std::nullopt;
- };
-
- do {
- auto header_start_pos = builder.pos();
- auto content_start = builder.try_find_literal("<|message|>");
- if (!content_start) {
- throw common_chat_msg_partial_exception("incomplete header");
- }
-
- auto header = content_start->prelude;
-
- if (auto match = regex_match(tool_call1_regex, header)) {
- auto group = match->groups[1];
- auto name = header.substr(group.begin, group.end - group.begin);
- handle_tool_call(name);
- continue;
- }
-
- if (auto match = regex_match(tool_call2_regex, header)) {
- auto group = match->groups[2];
- auto name = header.substr(group.begin, group.end - group.begin);
- handle_tool_call(name);
- continue;
- }
-
- if (regex_match(analysis_regex, header)) {
- builder.move_to(header_start_pos);
- if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
- builder.add_content(consume_end(true));
- } else {
- builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
- }
- continue;
- }
-
- if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
- builder.add_content(consume_end());
- continue;
- }
-
- // Possibly a malformed message, attempt to recover by rolling
- // back to pick up the next <|start|>
- LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
- builder.move_to(header_start_pos);
- } while (builder.try_find_regex(start_regex, std::string::npos, false));
-
- auto remaining = builder.consume_rest();
- if (!remaining.empty()) {
- LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
- }
-}
static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
@@ -2398,21 +1903,6 @@ static common_chat_params common_chat_params_init_glm_4_5(const common_chat_temp
return data;
}
-static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form {
- /* form.scope_start = */ "",
- /* form.tool_start = */ "",
- /* form.tool_sep = */ "",
- /* form.key_start = */ "",
- /* form.key_val_sep = */ "",
- /* form.val_end = */ "",
- /* form.tool_end = */ "",
- /* form.scope_end = */ "",
- /* form.key_val_sep2 = */ "",
- };
- builder.consume_reasoning_with_xml_tool_calls(form, "", "");
-}
-
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
LOG_DBG("%s\n", __func__);
common_chat_params data;
@@ -2460,14 +1950,6 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
}
return data;
}
-static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
- static const common_regex prefix(regex_escape(" functools["));
- parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
-}
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
// >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
@@ -2518,34 +2000,6 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
}
return data;
}
-static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
- static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
- static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
- static const common_regex close_regex(R"(\s*)");
-
- parse_json_tool_calls(
- builder,
- std::nullopt,
- function_regex_start_only,
- function_regex,
- close_regex,
- std::nullopt,
- /* allow_raw_python= */ true,
- /* get_function_name= */ [&](const auto & res) -> std::string {
- auto at_start = res.groups[0].begin == 0;
- auto name = builder.str(res.groups[1]);
- if (!name.empty() && name.back() == '{') {
- // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
- builder.move_back(1);
- }
- auto idx = name.find_last_not_of("\n{");
- name = name.substr(0, idx + 1);
- if (at_start && name == "all") {
- return "";
- }
- return name;
- });
-}
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
// https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
@@ -2605,31 +2059,6 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
// TODO: if (has_raw_python)
return data;
}
-static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
- // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
- static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
-
- static const common_regex function_regex(R"()");
- static const common_regex close_regex(R"()");
-
- parse_json_tool_calls(
- builder,
- /* block_open= */ std::nullopt,
- /* function_regex_start_only= */ std::nullopt,
- function_regex,
- close_regex,
- std::nullopt);
-
- if (auto res = builder.try_find_regex(python_tag_regex)) {
- auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
- builder.add_tool_call("python", "", arguments);
- return;
- }
-}
static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
@@ -2746,83 +2175,6 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
return data;
}
-static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("", "");
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex open_regex(
- "(?:"
- "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
- "(" // match 2 (open_tag)
- ""
- "|"
- "|"
- "|"
- "|"
- "|"
- "|"
- "|"
- ")?"
- "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
- ")"
- "|]+)>" // match 4 (function name)
- "|" // match 5 (function name again)
- );
-
- while (auto res = builder.try_find_regex(open_regex)) {
- const auto & block_start = res->groups[1];
- std::string block_end = block_start.empty() ? "" : "```";
-
- const auto & open_tag = res->groups[2];
- std::string close_tag;
-
- if (!res->groups[3].empty()) {
- builder.move_to(res->groups[3].begin);
- close_tag = open_tag.empty() ? "" : "" + builder.str(open_tag).substr(1);
-
- if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
- if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_spaces();
- builder.consume_literal(close_tag);
- builder.consume_spaces();
- if (!block_end.empty()) {
- builder.consume_literal(block_end);
- builder.consume_spaces();
- }
- } else {
- throw common_chat_msg_partial_exception("failed to parse tool call");
- }
- } else {
- auto function_name = builder.str(res->groups[4]);
- if (function_name.empty()) {
- function_name = builder.str(res->groups[5]);
- }
- GGML_ASSERT(!function_name.empty());
-
- close_tag = "";
-
- if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
- if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_spaces();
- builder.consume_literal(close_tag);
- builder.consume_spaces();
- if (!block_end.empty()) {
- builder.consume_literal(block_end);
- builder.consume_spaces();
- }
- }
- }
- }
-
- builder.add_content(builder.consume_rest());
-}
static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
@@ -2905,190 +2257,6 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
return data;
}
-static void common_chat_parse_granite(common_chat_msg_parser & builder) {
- // Parse thinking tags
- static const common_regex start_think_regex(regex_escape(""));
- static const common_regex end_think_regex(regex_escape(""));
- // Granite models output partial tokens such as "<" and "groups[0].begin);
- builder.try_find_regex(end_think_regex, std::string::npos, false);
- // Restore position for try_parse_reasoning()
- builder.move_to(res->groups[0].begin);
- }
- builder.try_parse_reasoning("", "");
-
- // Parse response tags
- static const common_regex start_response_regex(regex_escape(""));
- static const common_regex end_response_regex(regex_escape(""));
- // Granite models output partial tokens such as "<" and ""));
- if (auto res = builder.try_find_regex(tool_call_regex)) {
- builder.move_to(res->groups[0].end);
-
- // Expect JSON array of tool calls
- if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
- if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- }
- } else {
- builder.add_content(builder.consume_rest());
- }
-}
-
-static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
- // Parse thinking tags
- builder.try_parse_reasoning("", "");
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- // Look for tool calls
- static const common_regex tool_call_regex(regex_escape(""));
- if (auto res = builder.try_find_regex(tool_call_regex)) {
- builder.move_to(res->groups[0].end);
-
- // Expect JSON array of tool calls
- auto tool_calls_data = builder.consume_json();
- if (tool_calls_data.json.is_array()) {
- if (!builder.try_consume_literal("")) {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- builder.add_tool_calls(tool_calls_data.json);
- } else {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- }
- builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
- // Parse thinking tags
- builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- // Look for tool calls
- static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
- if (auto res = builder.try_find_regex(tool_call_regex)) {
- builder.move_to(res->groups[0].end);
-
- auto tool_calls_data = builder.consume_json();
- if (tool_calls_data.json.is_array()) {
- builder.consume_spaces();
- if (!builder.try_consume_literal("<|tools_suffix|>")) {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- for (const auto & value : tool_calls_data.json) {
- if (value.is_object()) {
- builder.add_tool_call_short_form(value);
- }
- }
- } else {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- }
- builder.add_content(builder.consume_rest());
-}
-
-
-static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
- static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
- static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
-
- // Loop through all tool calls
- while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
- builder.move_to(res->groups[0].end);
-
- // Parse JSON array format: [{"name": "...", "arguments": {...}}]
- auto tool_calls_data = builder.consume_json();
-
- // Consume end marker
- builder.consume_spaces();
- if (!builder.try_consume_regex(tool_call_end_regex)) {
- throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
- }
-
- // Process each tool call in the array
- if (tool_calls_data.json.is_array()) {
- for (const auto & tool_call : tool_calls_data.json) {
- if (!tool_call.is_object()) {
- throw common_chat_msg_partial_exception("Tool call must be an object");
- }
-
- if (!tool_call.contains("name")) {
- throw common_chat_msg_partial_exception("Tool call missing 'name' field");
- }
-
- std::string function_name = tool_call.at("name");
- std::string arguments = "{}";
-
- if (tool_call.contains("arguments")) {
- if (tool_call.at("arguments").is_object()) {
- arguments = tool_call.at("arguments").dump();
- } else if (tool_call.at("arguments").is_string()) {
- arguments = tool_call.at("arguments");
- }
- }
-
- if (!builder.add_tool_call(function_name, "", arguments)) {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- }
- } else {
- throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
- }
-
- // Consume any trailing whitespace after this tool call
- builder.consume_spaces();
- }
-
- // Consume any remaining content after all tool calls
- auto remaining = builder.consume_rest();
- if (!string_strip(remaining).empty()) {
- builder.add_content(remaining);
- }
-}
-
-static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form {
- /* form.scope_start = */ "",
- /* form.tool_start = */ "",
- /* form.key_start = */ "",
- /* form.val_end = */ "",
- /* form.tool_end = */ "",
- /* form.scope_end = */ "",
- };
- builder.consume_reasoning_with_xml_tool_calls(form, "", "");
-}
-
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
data.prompt = apply(tmpl, inputs);
@@ -3428,112 +2596,3 @@ common_chat_params common_chat_templates_apply(
? common_chat_templates_apply_jinja(tmpls, inputs)
: common_chat_templates_apply_legacy(tmpls, inputs);
}
-
-static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("", "");
- builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse(common_chat_msg_parser & builder) {
- LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
-
- switch (builder.syntax().format) {
- case COMMON_CHAT_FORMAT_CONTENT_ONLY:
- common_chat_parse_content_only(builder);
- break;
- case COMMON_CHAT_FORMAT_GENERIC:
- common_chat_parse_generic(builder);
- break;
- case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
- common_chat_parse_mistral_nemo(builder);
- break;
- case COMMON_CHAT_FORMAT_MAGISTRAL:
- common_chat_parse_magistral(builder);
- break;
- case COMMON_CHAT_FORMAT_LLAMA_3_X:
- common_chat_parse_llama_3_1(builder);
- break;
- case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
- common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
- break;
- case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
- common_chat_parse_deepseek_r1(builder);
- break;
- case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
- common_chat_parse_deepseek_v3_1(builder);
- break;
- case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
- common_chat_parse_functionary_v3_2(builder);
- break;
- case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
- common_chat_parse_functionary_v3_1_llama_3_1(builder);
- break;
- case COMMON_CHAT_FORMAT_HERMES_2_PRO:
- common_chat_parse_hermes_2_pro(builder);
- break;
- case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
- common_chat_parse_firefunction_v2(builder);
- break;
- case COMMON_CHAT_FORMAT_COMMAND_R7B:
- common_chat_parse_command_r7b(builder);
- break;
- case COMMON_CHAT_FORMAT_GRANITE:
- common_chat_parse_granite(builder);
- break;
- case COMMON_CHAT_FORMAT_GPT_OSS:
- common_chat_parse_gpt_oss(builder);
- break;
- case COMMON_CHAT_FORMAT_SEED_OSS:
- common_chat_parse_seed_oss(builder);
- break;
- case COMMON_CHAT_FORMAT_NEMOTRON_V2:
- common_chat_parse_nemotron_v2(builder);
- break;
- case COMMON_CHAT_FORMAT_APERTUS:
- common_chat_parse_apertus(builder);
- break;
- case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
- common_chat_parse_lfm2(builder);
- break;
- case COMMON_CHAT_FORMAT_MINIMAX_M2:
- common_chat_parse_minimax_m2(builder);
- break;
- case COMMON_CHAT_FORMAT_GLM_4_5:
- common_chat_parse_glm_4_5(builder);
- break;
- case COMMON_CHAT_FORMAT_KIMI_K2:
- common_chat_parse_kimi_k2(builder);
- break;
- case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
- common_chat_parse_qwen3_coder_xml(builder);
- break;
- case COMMON_CHAT_FORMAT_APRIEL_1_5:
- common_chat_parse_apriel_1_5(builder);
- break;
- case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
- common_chat_parse_xiaomi_mimo(builder);
- break;
- default:
- throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
- }
- builder.finish();
-}
-
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
- common_chat_msg_parser builder(input, is_partial, syntax);
- try {
- common_chat_parse(builder);
- } catch (const common_chat_msg_partial_exception & ex) {
- LOG_DBG("Partial parse: %s\n", ex.what());
- if (!is_partial) {
- builder.clear_tools();
- builder.move_to(0);
- common_chat_parse_content_only(builder);
- }
- }
- auto msg = builder.result();
- if (!is_partial) {
- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
- }
- return msg;
-}
diff --git a/common/chat.h b/common/chat.h
index 754c411e23..6085510a40 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -3,6 +3,7 @@
#pragma once
#include "common.h"
+#include "peg-parser.h"
#include
#include
#include
@@ -76,7 +77,7 @@ struct common_chat_msg_diff {
size_t tool_call_index = std::string::npos;
common_chat_tool_call tool_call_delta;
- static std::vector compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
+ static std::vector compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
bool operator==(const common_chat_msg_diff & other) const {
return content_delta == other.content_delta
@@ -124,6 +125,11 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_APRIEL_1_5,
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
+ // These are intended to be parsed by the PEG parser
+ COMMON_CHAT_FORMAT_PEG_SIMPLE,
+ COMMON_CHAT_FORMAT_PEG_NATIVE,
+ COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
+
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
};
@@ -154,6 +160,7 @@ struct common_chat_params {
std::vector grammar_triggers;
std::vector preserved_tokens;
std::vector additional_stops;
+ std::string parser;
};
struct common_chat_syntax {
@@ -163,6 +170,7 @@ struct common_chat_syntax {
bool reasoning_in_content = false;
bool thinking_forced_open = false;
bool parse_tool_calls = true;
+ common_peg_arena parser = {};
};
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -206,6 +214,7 @@ const char* common_chat_format_name(common_chat_format format);
const char* common_reasoning_format_name(common_reasoning_format format);
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
+common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
diff --git a/common/common.cpp b/common/common.cpp
index 0d7fd9a937..f07af1d862 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -694,7 +694,7 @@ bool string_parse_kv_override(const char * data, std::vector= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
|| c == 0xFFFD // Replacement Character (UTF-8)
|| c == 0xFEFF // Byte Order Mark (BOM)
- || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
+ || c == ':' || c == '*' // Illegal characters
|| c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
return false;
}
+ if (!allow_subdirs && (c == '/' || c == '\\')) {
+ // Subdirectories not allowed, reject path separators
+ return false;
+ }
}
// Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
@@ -859,6 +863,11 @@ bool fs_create_directory_with_parents(const std::string & path) {
#endif // _WIN32
}
+bool fs_is_directory(const std::string & path) {
+ std::filesystem::path dir(path);
+ return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
+}
+
std::string fs_get_cache_directory() {
std::string cache_directory = "";
auto ensure_trailing_slash = [](std::string p) {
@@ -893,6 +902,8 @@ std::string fs_get_cache_directory() {
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
#elif defined(_WIN32)
cache_directory = std::getenv("LOCALAPPDATA");
+#elif defined(__EMSCRIPTEN__)
+ GGML_ABORT("not implemented on this platform");
#else
# error Unknown architecture
#endif
@@ -912,7 +923,7 @@ std::string fs_get_cache_file(const std::string & filename) {
return cache_directory + filename;
}
-std::vector fs_list_files(const std::string & path) {
+std::vector fs_list(const std::string & path, bool include_directories) {
std::vector files;
if (path.empty()) return files;
@@ -927,14 +938,22 @@ std::vector fs_list_files(const std::string & path) {
const auto & p = entry.path();
if (std::filesystem::is_regular_file(p)) {
common_file_info info;
- info.path = p.string();
- info.name = p.filename().string();
+ info.path = p.string();
+ info.name = p.filename().string();
+ info.is_dir = false;
try {
info.size = static_cast(std::filesystem::file_size(p));
} catch (const std::filesystem::filesystem_error &) {
info.size = 0;
}
files.push_back(std::move(info));
+ } else if (include_directories && std::filesystem::is_directory(p)) {
+ common_file_info info;
+ info.path = p.string();
+ info.name = p.filename().string();
+ info.size = 0; // Directories have no size
+ info.is_dir = true;
+ files.push_back(std::move(info));
}
} catch (const std::filesystem::filesystem_error &) {
// skip entries we cannot inspect
diff --git a/common/common.h b/common/common.h
index 2f23d0baa8..179113a4db 100644
--- a/common/common.h
+++ b/common/common.h
@@ -12,6 +12,10 @@
#include
#include