diff --git a/.github/actions/windows-setup-cuda/action.yml b/.github/actions/windows-setup-cuda/action.yml
index 5575caeca3..6ad61582a5 100644
--- a/.github/actions/windows-setup-cuda/action.yml
+++ b/.github/actions/windows-setup-cuda/action.yml
@@ -65,3 +65,34 @@ runs:
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+
+ - name: Install Cuda Toolkit 13.1
+ if: ${{ inputs.cuda_version == '13.1' }}
+ shell: pwsh
+ run: |
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1"
+ choco install unzip -y
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_crt/windows-x86_64/cuda_crt-windows-x86_64-13.1.80-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-13.1.80-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-13.1.80-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-13.1.80-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-13.2.0.9-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libnvvm/windows-x86_64/libnvvm-windows-x86_64-13.1.80-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-13.1.68-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-13.1.80-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-13.1.68-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-13.1.78-archive.zip"
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1"
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_crt-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_cudart-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvcc-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvrtc-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\libcublas-windows-x86_64-13.2.0.9-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\libnvvm-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvtx-windows-x86_64-13.1.68-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_profiler_api-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\visual_studio_integration-windows-x86_64-13.1.68-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_cccl-windows-x86_64-13.1.78-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+ echo "CUDA_PATH_V13_1=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml
index 36201281f0..c2c6ea12ae 100644
--- a/.github/workflows/build-linux-cross.yml
+++ b/.github/workflows/build-linux-cross.yml
@@ -291,6 +291,7 @@ jobs:
-DGGML_RVV=ON \
-DGGML_RV_ZFH=ON \
-DGGML_RV_ZICBOP=ON \
+ -DGGML_RV_ZIHINTPAUSE=ON \
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
diff --git a/.github/workflows/build-riscv-native.yml b/.github/workflows/build-riscv-native.yml
deleted file mode 100644
index a3a0b0d663..0000000000
--- a/.github/workflows/build-riscv-native.yml
+++ /dev/null
@@ -1,120 +0,0 @@
-name: Build on RISCV Linux Machine by Cloud-V
-on:
- pull_request:
- workflow_dispatch:
- workflow_call:
-
-jobs:
- debian-13-riscv64-native: # Bianbu 2.2
- runs-on: [self-hosted, RISCV64]
-
- steps:
- - name: Install prerequisites
- run: |
- sudo apt-get update || true
- sudo apt-get install -y libatomic1
- - uses: actions/checkout@v4
- - name: Setup Riscv
- run: |
- sudo apt-get update || true
- sudo apt-get install -y --no-install-recommends \
- build-essential \
- gcc-14-riscv64-linux-gnu \
- g++-14-riscv64-linux-gnu \
- ccache \
- cmake
-
- - name: Setup ccache
- run: |
- mkdir -p $HOME/.ccache
- ccache -M 5G -d $HOME/.ccache
- export CCACHE_LOGFILE=/home/runneruser/ccache_debug/ccache.log
- export CCACHE_DEBUGDIR="/home/runneruser/ccache_debug"
- echo "$GITHUB_WORKSPACE"
- echo "CCACHE_LOGFILE=$CCACHE_LOGFILE" >> $GITHUB_ENV
- echo "CCACHE_DEBUGDIR=$CCACHE_DEBUGDIR" >> $GITHUB_ENV
- echo "CCACHE_BASEDIR=$GITHUB_WORKSPACE" >> $GITHUB_ENV
- echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV
-
- - name: Build
- run: |
- cmake -B build \
- -DLLAMA_CURL=OFF \
- -DCMAKE_BUILD_TYPE=Release \
- -DGGML_OPENMP=OFF \
- -DLLAMA_BUILD_EXAMPLES=ON \
- -DLLAMA_BUILD_TOOLS=ON \
- -DLLAMA_BUILD_TESTS=OFF \
- -DCMAKE_SYSTEM_NAME=Linux \
- -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
- -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
- -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
- -DCMAKE_C_COMPILER_LAUNCHER=ccache \
- -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
- -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
- -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
- -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
- -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
- -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
-
- cmake --build build --config Release -j $(nproc)
-
- # debian-13-riscv64-spacemit-ime-native: # Bianbu 2.2
- # runs-on: [self-hosted, RISCV64]
-
- # steps:
- # - name: Install prerequisites
- # run: |
- # sudo apt-get update || true
- # sudo apt-get install -y libatomic1
- # - uses: actions/checkout@v4
- # - name: Setup Riscv
- # run: |
- # sudo apt-get update || true
- # sudo apt-get install -y --no-install-recommends \
- # build-essential \
- # gcc-14-riscv64-linux-gnu \
- # g++-14-riscv64-linux-gnu \
- # ccache \
- # cmake
- # sudo apt-get upgrade binutils -y
-
- # - name: Setup ccache
- # run: |
- # mkdir -p $HOME/.ccache
- # ccache -M 5G -d $HOME/.ccache
- # export CCACHE_LOGFILE=/home/runneruser/ccache_debug/ccache.log
- # export CCACHE_DEBUGDIR="/home/runneruser/ccache_debug"
- # echo "$GITHUB_WORKSPACE"
- # echo "CCACHE_LOGFILE=$CCACHE_LOGFILE" >> $GITHUB_ENV
- # echo "CCACHE_DEBUGDIR=$CCACHE_DEBUGDIR" >> $GITHUB_ENV
- # echo "CCACHE_BASEDIR=$GITHUB_WORKSPACE" >> $GITHUB_ENV
- # echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV
-
- # - name: Build
- # run: |
- # cmake -B build \
- # -DLLAMA_CURL=OFF \
- # -DCMAKE_BUILD_TYPE=Release \
- # -DGGML_OPENMP=OFF \
- # -DLLAMA_BUILD_EXAMPLES=ON \
- # -DLLAMA_BUILD_TOOLS=ON \
- # -DLLAMA_BUILD_TESTS=OFF \
- # -DCMAKE_SYSTEM_NAME=Linux \
- # -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
- # -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
- # -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
- # -DCMAKE_C_COMPILER_LAUNCHER=ccache \
- # -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
- # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
- # -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
- # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
- # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
- # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH \
- # -DGGML_RVV=ON \
- # -DGGML_RV_ZFH=ON \
- # -DGGML_RV_ZICBOP=ON \
- # -DGGML_CPU_RISCV64_SPACEMIT=ON \
- # -DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1
-
- # cmake --build build --config Release -j $(nproc)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index eee42759fc..ad205f3ec9 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -547,6 +547,46 @@ jobs:
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 3600
+ ubuntu-24-wasm-webgpu:
+ runs-on: ubuntu-24.04
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: ggml-org/ccache-action@v1.2.16
+ with:
+ key: ubuntu-latest-wasm-webgpu
+ evict-old-files: 1d
+
+ - name: Install Emscripten
+ run: |
+ git clone https://github.com/emscripten-core/emsdk.git
+ cd emsdk
+ ./emsdk install latest
+ ./emsdk activate latest
+
+ - name: Fetch emdawnwebgpu
+ run: |
+ DAWN_TAG="v20251027.212519"
+ EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
+ echo "Downloading ${EMDAWN_PKG}"
+ curl -L -o emdawn.zip \
+ "https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}"
+ unzip emdawn.zip
+
+ - name: Build WASM WebGPU
+ run: |
+ source emsdk/emsdk_env.sh
+ emcmake cmake -B build-wasm \
+ -DGGML_WEBGPU=ON \
+ -DLLAMA_CURL=OFF \
+ -DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg
+
+ cmake --build build-wasm --target test-backend-ops -j $(nproc)
+
ubuntu-22-cmake-hip:
runs-on: ubuntu-22.04
container: rocm/dev-ubuntu-22.04:6.1.2
@@ -1562,33 +1602,33 @@ jobs:
run: |
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
- ggml-ci-x64-amd-vulkan:
- runs-on: [self-hosted, Linux, X64, AMD]
+ # ggml-ci-x64-amd-vulkan:
+ # runs-on: [self-hosted, Linux, X64, AMD]
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
+ # steps:
+ # - name: Clone
+ # id: checkout
+ # uses: actions/checkout@v4
- - name: Test
- id: ggml-ci
- run: |
- vulkaninfo --summary
- GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+ # - name: Test
+ # id: ggml-ci
+ # run: |
+ # vulkaninfo --summary
+ # GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
- ggml-ci-x64-amd-rocm:
- runs-on: [self-hosted, Linux, X64, AMD]
+ # ggml-ci-x64-amd-rocm:
+ # runs-on: [self-hosted, Linux, X64, AMD]
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v4
+ # steps:
+ # - name: Clone
+ # id: checkout
+ # uses: actions/checkout@v4
- - name: Test
- id: ggml-ci
- run: |
- amd-smi static
- GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+ # - name: Test
+ # id: ggml-ci
+ # run: |
+ # amd-smi static
+ # GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
ggml-ci-mac-metal:
runs-on: [self-hosted, macOS, ARM64]
@@ -1642,6 +1682,337 @@ jobs:
run: |
GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+ ubuntu-cpu-cmake-riscv64-native:
+ runs-on: RISCV64
+
+ steps:
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+
+ # Install necessary packages
+ sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache
+
+ # Set gcc-14 and g++-14 as the default compilers
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
+ sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
+ sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
+
+ # Install Rust stable version
+ rustup install stable
+ rustup default stable
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Check environment
+ run: |
+ uname -a
+ gcc --version
+ g++ --version
+ ldd --version
+ cmake --version
+ rustc --version
+
+ - name: Setup ccache
+ run: |
+ # Set unique cache directory for this job
+ export CCACHE_DIR="$HOME/.ccache/cpu-cmake-rv64-native"
+ mkdir -p "$CCACHE_DIR"
+
+ # Configure ccache for optimal performance
+ ccache --set-config=max_size=5G
+ ccache --set-config=compression=true
+ ccache --set-config=compression_level=6
+ ccache --set-config=cache_dir="$CCACHE_DIR"
+
+ # Enable more aggressive caching
+ ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
+ ccache --set-config=hash_dir=false
+
+ # Export for subsequent steps
+ echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
+ echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENMP=OFF \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=ON \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DGGML_RPC=ON \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
+
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L 'main|curl' --verbose --timeout 900
+
+ - name: Test llama2c conversion
+ id: llama2c_test
+ run: |
+ cd build
+ echo "Fetch tokenizer"
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
+ echo "Fetch llama2c model"
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
+ ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
+ ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+
+ ubuntu-cmake-sanitizer-riscv64-native:
+ runs-on: RISCV64
+
+ continue-on-error: true
+
+ strategy:
+ matrix:
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
+ build_type: [Debug]
+
+ steps:
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+
+ # Install necessary packages
+ sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache
+
+ # Set gcc-14 and g++-14 as the default compilers
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
+ sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
+ sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
+
+ # Install Rust stable version
+ rustup install stable
+ rustup default stable
+
+ - name: GCC version check
+ run: |
+ gcc --version
+ g++ --version
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Setup ccache
+ run: |
+ # Unique cache directory per matrix combination
+ export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}"
+ mkdir -p "$CCACHE_DIR"
+
+ # Configure ccache
+ ccache --set-config=max_size=5G
+ ccache --set-config=compression=true
+ ccache --set-config=compression_level=6
+ ccache --set-config=cache_dir="$CCACHE_DIR"
+ ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
+ ccache --set-config=hash_dir=false
+
+ # Export for subsequent steps
+ echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
+ echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
+
+ - name: Build
+ id: cmake_build
+ if: ${{ matrix.sanitizer != 'THREAD' }}
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+ -DGGML_OPENMP=ON \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
+
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
+
+ - name: Build (no OpenMP)
+ id: cmake_build_no_openmp
+ if: ${{ matrix.sanitizer == 'THREAD' }}
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+ -DGGML_OPENMP=OFF \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
+
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose --timeout 900
+
+
+ ubuntu-llguidance-riscv64-native:
+ runs-on: RISCV64
+ steps:
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+
+ # Install necessary packages
+ sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache
+
+ # Set gcc-14 and g++-14 as the default compilers
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
+ sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
+ sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
+
+ # Install Rust stable version
+ rustup install stable
+ rustup default stable
+
+ - name: GCC version check
+ run: |
+ gcc --version
+ g++ --version
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Setup ccache
+ run: |
+ export CCACHE_DIR="$HOME/.ccache/llguidance-riscv64"
+ mkdir -p "$CCACHE_DIR"
+
+ ccache --set-config=max_size=5G
+ ccache --set-config=compression=true
+ ccache --set-config=compression_level=6
+ ccache --set-config=cache_dir="$CCACHE_DIR"
+ ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
+ ccache --set-config=hash_dir=false
+
+ echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
+ echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENMP=OFF \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DLLAMA_LLGUIDANCE=ON \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
+
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose --timeout 900
+
+
+ ubuntu-cmake-rpc-riscv64-native:
+ runs-on: RISCV64
+
+ continue-on-error: true
+
+ steps:
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+
+ # Install necessary packages
+ sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache
+
+ # Set gcc-14 and g++-14 as the default compilers
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
+ sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
+ sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
+
+ # Install Rust stable version
+ rustup install stable
+ rustup default stable
+
+ - name: GCC version check
+ run: |
+ gcc --version
+ g++ --version
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Setup ccache
+ run: |
+ export CCACHE_DIR="$HOME/.ccache/rpc-riscv64"
+ mkdir -p "$CCACHE_DIR"
+
+ ccache --set-config=max_size=5G
+ ccache --set-config=compression=true
+ ccache --set-config=compression_level=6
+ ccache --set-config=cache_dir="$CCACHE_DIR"
+ ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
+ ccache --set-config=hash_dir=false
+
+ echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
+ echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DLLAMA_CURL=OFF \
+ -DLLAMA_OPENSSL=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_OPENMP=OFF \
+ -DLLAMA_BUILD_EXAMPLES=ON \
+ -DLLAMA_BUILD_TOOLS=ON \
+ -DLLAMA_BUILD_TESTS=ON \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
+ -DGGML_RPC=ON
+
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose
+
ggml-ci-arm64-graviton4-kleidiai:
runs-on: ah-ubuntu_22_04-c8g_8x
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 0d5739c24b..77aec20c11 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -66,14 +66,21 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
+ zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
name: llama-bin-macos-arm64.zip
+ - name: Upload artifacts (tar)
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz
+ name: llama-bin-macos-arm64.tar.gz
+
macOS-x64:
runs-on: macos-15-intel
@@ -120,14 +127,21 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
+ zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
name: llama-bin-macos-x64.zip
+ - name: Upload artifacts (tar)
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz
+ name: llama-bin-macos-x64.tar.gz
+
ubuntu-22-cpu:
strategy:
matrix:
@@ -182,14 +196,21 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
+ zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
name: llama-bin-ubuntu-${{ matrix.build }}.zip
+ - name: Upload artifacts (tar)
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz
+ name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz
+
ubuntu-22-vulkan:
runs-on: ubuntu-22.04
@@ -235,14 +256,21 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
+ zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
+ tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
name: llama-bin-ubuntu-vulkan-x64.zip
+ - name: Upload artifacts (tar)
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
+ name: llama-bin-ubuntu-vulkan-x64.tar.gz
+
windows-cpu:
runs-on: windows-2025
@@ -298,7 +326,7 @@ jobs:
run: |
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
- 7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
+ 7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
- name: Upload artifacts
uses: actions/upload-artifact@v4
@@ -380,7 +408,7 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
- 7z a llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
+ 7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
- name: Upload artifacts
uses: actions/upload-artifact@v4
@@ -393,7 +421,7 @@ jobs:
strategy:
matrix:
- cuda: ['12.4']
+ cuda: ['12.4', '13.1']
steps:
- name: Clone
@@ -434,7 +462,7 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
- 7z a llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
+ 7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
- name: Upload artifacts
uses: actions/upload-artifact@v4
@@ -448,6 +476,7 @@ jobs:
$dst='.\build\bin\cudart\'
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
+ robocopy "${{env.CUDA_PATH}}\bin\x64" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\*
- name: Upload Cuda runtime
@@ -517,6 +546,8 @@ jobs:
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl-ls.exe" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libsycl-fallback-bfloat16.spv" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libsycl-native-bfloat16.spv" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
@@ -526,7 +557,7 @@ jobs:
cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin
echo "cp oneAPI running time dll files to ./build/bin done"
- 7z a llama-bin-win-sycl-x64.zip ./build/bin/*
+ 7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/*
- name: Upload the release package
uses: actions/upload-artifact@v4
@@ -632,7 +663,7 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
- 7z a llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
+ 7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
- name: Upload artifacts
uses: actions/upload-artifact@v4
@@ -685,58 +716,20 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
- zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
+ zip -y -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
+ tar -czvf llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz -C build-apple llama.xcframework
- - name: Upload artifacts
+ - name: Upload artifacts (zip)
uses: actions/upload-artifact@v4
with:
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
- name: llama-${{ steps.tag.outputs.name }}-xcframework
+ name: llama-${{ steps.tag.outputs.name }}-xcframework.zip
- openEuler-cann:
- strategy:
- matrix:
- arch: [x86, aarch64]
- chip_type: ['910b', '310p']
- build: ['Release']
- runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
- container: ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc1.alpha001-910b-openeuler22.03-py3.11' || '8.2.rc1-310p-openeuler22.03-py3.11' }}
- steps:
- - name: Checkout
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Dependencies
- run: |
- yum update -y
- yum install -y git gcc gcc-c++ make cmake libcurl-devel
- git config --global --add safe.directory "$GITHUB_WORKSPACE"
-
- - name: Build
- run: |
- export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
-
- cmake -S . -B build \
- -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
- -DGGML_CANN=on \
- -DSOC_TYPE=ascend${{ matrix.chip_type }}
- cmake --build build -j $(nproc)
-
- - name: Determine tag name
- id: tag
- uses: ./.github/actions/get-tag-name
-
- - name: Pack artifacts
- run: |
- cp LICENSE ./build/bin/
- zip -r llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip ./build/bin/*
-
- - name: Upload artifacts
+ - name: Upload artifacts (tar)
uses: actions/upload-artifact@v4
with:
- path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
- name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
+ path: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz
+ name: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz
release:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
@@ -759,7 +752,6 @@ jobs:
- macOS-arm64
- macOS-x64
- ios-xcode-build
- - openEuler-cann
steps:
- name: Clone
@@ -814,6 +806,7 @@ jobs:
echo "Moving other artifacts..."
mv -v artifact/*.zip release
+ mv -v artifact/*.tar.gz release
- name: Create release
id: create_release
@@ -822,6 +815,34 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.tag.outputs.name }}
+ body: |
+ > [!WARNING]
+ > **Release Format Update**: Linux releases will soon use .tar.gz archives instead of .zip. Please make the necessary changes to your deployment scripts.
+
+
+
+ ${{ github.event.head_commit.message }}
+
+
+
+ **macOS/iOS:**
+ - [macOS Apple Silicon (arm64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz)
+ - [macOS Intel (x64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz)
+ - [iOS XCFramework](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz)
+
+ **Linux:**
+ - [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz)
+ - [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz)
+ - [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz)
+
+ **Windows:**
+ - [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip)
+ - [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip)
+ - [Windows x64 (CUDA 12)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip)
+ - [Windows x64 (CUDA 13)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.1-x64.zip)
+ - [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
+ - [Windows x64 (SYCL)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip)
+ - [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip)
- name: Upload release
id: upload_release
@@ -833,7 +854,7 @@ jobs:
const fs = require('fs');
const release_id = '${{ steps.create_release.outputs.id }}';
for (let file of await fs.readdirSync('./release')) {
- if (path.extname(file) === '.zip') {
+ if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
diff --git a/.github/workflows/winget.yml b/.github/workflows/winget.yml
index 5c28615595..d3d9be23ce 100644
--- a/.github/workflows/winget.yml
+++ b/.github/workflows/winget.yml
@@ -9,6 +9,7 @@ jobs:
update:
name: Update Winget Package
runs-on: ubuntu-latest
+ if: github.repository_owner == 'ggml-org'
steps:
- name: Install cargo binstall
diff --git a/.gitignore b/.gitignore
index 8575a141c4..428f084110 100644
--- a/.gitignore
+++ b/.gitignore
@@ -134,3 +134,5 @@ poetry.toml
# IDE
/*.code-workspace
/.windsurf/
+# emscripten
+a.out.*
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3278c4a72c..c231ec0e3f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,10 +33,24 @@ endif()
option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
+option(LLAMA_WASM_MEM64 "llama: use 64-bit memory in WASM builds" ON)
+
if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
- option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
+ # Use 64-bit memory to support backend_get_memory queries
+ # TODO: analyze performance impact, see https://spidermonkey.dev/blog/2025/01/15/is-memory64-actually-worth-using
+ if (LLAMA_WASM_MEM64)
+ add_compile_options("-sMEMORY64=1")
+ add_link_options("-sMEMORY64=1")
+ endif()
+ add_link_options("-sALLOW_MEMORY_GROWTH=1")
+
+ option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
+ option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
+ if (LLAMA_BUILD_HTML)
+ set(CMAKE_EXECUTABLE_SUFFIX ".html")
+ endif()
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
@@ -58,6 +72,12 @@ if (MSVC)
add_compile_options("$<$:/bigobj>")
endif()
+if (LLAMA_STANDALONE)
+ # enable parallel builds for msbuild
+ list(APPEND CMAKE_VS_GLOBALS UseMultiToolTask=true)
+ list(APPEND CMAKE_VS_GLOBALS EnforceProcessCountAcrossBuilds=true)
+endif()
+
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
set(LLAMA_TOOLS_INSTALL_DEFAULT OFF)
else()
@@ -179,11 +199,6 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
# ... otherwise assume ggml is added by a parent CMakeLists.txt
endif()
-if (MINGW)
- # Target Windows 8 for PrefetchVirtualMemory
- add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
-endif()
-
#
# build the library
#
diff --git a/CODEOWNERS b/CODEOWNERS
index 6ef6c0489f..8e62a36e81 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -7,16 +7,20 @@
/ci/ @ggerganov
/cmake/ @ggerganov
/common/CMakeLists.txt @ggerganov
-/common/arg.* @ggerganov @ericcurtin
+/common/arg.* @ggerganov
/common/base64.hpp.* @ggerganov
/common/build-info.* @ggerganov
+/common/chat.* @pwilkin
+/common/chat-peg-parser.* @aldehir
/common/common.* @ggerganov
/common/console.* @ggerganov
/common/http.* @angt
/common/llguidance.* @ggerganov
/common/log.* @ggerganov
+/common/peg-parser.* @aldehir
/common/sampling.* @ggerganov
/common/speculative.* @ggerganov
+/common/unicode.* @aldehir
/convert_*.py @CISC
/examples/batched.swift/ @ggerganov
/examples/batched/ @ggerganov
@@ -81,14 +85,14 @@
/src/llama-vocab.* @CISC
/src/models/ @CISC
/tests/ @ggerganov
+/tests/test-chat-.* @pwilkin
/tools/batched-bench/ @ggerganov
/tools/main/ @ggerganov
/tools/mtmd/ @ngxson
/tools/perplexity/ @ggerganov
/tools/quantize/ @ggerganov
/tools/rpc/ @rgerganov
-/tools/run/ @ericcurtin
-/tools/server/* @ngxson @ggerganov @ericcurtin # no subdir
+/tools/server/* @ngxson @ggerganov # no subdir
/tools/server/webui/ @allozaur
/tools/tokenize/ @ggerganov
/tools/tts/ @ggerganov
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 875eb766f3..e4f05258db 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -16,7 +16,7 @@ The project differentiates between 3 levels of contributors:
- If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
- Create separate PRs for each feature or fix. Avoid combining unrelated changes in a single PR
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
-- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
+- If your PR becomes stale, rebase it on top of latest `master` to get maintainers attention
- Maintainers will rely on your insights and approval when making a final decision to approve and merge a PR
- Consider adding yourself to [CODEOWNERS](CODEOWNERS) to indicate your availability for reviewing related PRs
- Using AI to generate PRs is permitted. However, you must (1) explicitly disclose how AI was used and (2) conduct a thorough manual review before publishing the PR. Note that trivial tab autocompletions do not require disclosure.
diff --git a/README.md b/README.md
index 2e44ae7d0c..7dd2bfd8a1 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ range of hardware - locally and in the cloud.
- Plain C/C++ implementation without any dependencies
- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks
- AVX, AVX2, AVX512 and AMX support for x86 architectures
-- RVV, ZVFH, ZFH and ZICBOP support for RISC-V architectures
+- RVV, ZVFH, ZFH, ZICBOP and ZIHINTPAUSE support for RISC-V architectures
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads GPUs via MUSA)
- Vulkan and SYCL backend support
@@ -276,6 +276,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
| [MUSA](docs/build.md#musa) | Moore Threads GPU |
| [CUDA](docs/build.md#cuda) | Nvidia GPU |
| [HIP](docs/build.md#hip) | AMD GPU |
+| [ZenDNN](docs/build.md#zendnn) | AMD CPU |
| [Vulkan](docs/build.md#vulkan) | GPU |
| [CANN](docs/build.md#cann) | Ascend NPU |
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
diff --git a/ci/run.sh b/ci/run.sh
index 1dd65adeaa..83b2603e82 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -45,7 +45,7 @@ sd=`dirname $0`
cd $sd/../
SRC=`pwd`
-CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_SCHED_NO_REALLOC=ON"
+CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=${LLAMA_FATAL_WARNINGS:-ON} -DLLAMA_CURL=ON -DGGML_SCHED_NO_REALLOC=ON"
if [ ! -z ${GG_BUILD_METAL} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
diff --git a/cmake/build-info.cmake b/cmake/build-info.cmake
index 75c78222f2..c7005950c5 100644
--- a/cmake/build-info.cmake
+++ b/cmake/build-info.cmake
@@ -39,26 +39,10 @@ if(Git_FOUND)
endif()
endif()
-if(MSVC)
- set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
- if (CMAKE_VS_PLATFORM_NAME)
- set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
- else()
- set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
- endif()
-else()
- execute_process(
- COMMAND ${CMAKE_C_COMPILER} --version
- OUTPUT_VARIABLE OUT
- OUTPUT_STRIP_TRAILING_WHITESPACE
- )
- string(REGEX REPLACE " *\n.*" "" OUT "${OUT}")
- set(BUILD_COMPILER ${OUT})
+set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
- execute_process(
- COMMAND ${CMAKE_C_COMPILER} -dumpmachine
- OUTPUT_VARIABLE OUT
- OUTPUT_STRIP_TRAILING_WHITESPACE
- )
- set(BUILD_TARGET ${OUT})
+if(CMAKE_VS_PLATFORM_NAME)
+ set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
+else()
+ set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
endif()
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index bb168e8358..377b26846b 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -52,6 +52,8 @@ add_library(${TARGET} STATIC
chat-parser.h
chat-parser-xml-toolcall.h
chat-parser-xml-toolcall.cpp
+ chat-peg-parser.cpp
+ chat-peg-parser.h
chat.cpp
chat.h
common.cpp
@@ -69,12 +71,16 @@ add_library(${TARGET} STATIC
log.h
ngram-cache.cpp
ngram-cache.h
+ peg-parser.cpp
+ peg-parser.h
regex-partial.cpp
regex-partial.h
sampling.cpp
sampling.h
speculative.cpp
speculative.h
+ unicode.cpp
+ unicode.h
)
if (BUILD_SHARED_LIBS)
diff --git a/common/arg.cpp b/common/arg.cpp
index 52094e3f10..4203da4a0a 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -30,6 +30,7 @@
#include // for hardware_concurrency
#include
+#ifndef __EMSCRIPTEN__
#ifdef __linux__
#include
#elif defined(_WIN32)
@@ -41,6 +42,8 @@
#else
#include
#endif
+#endif
+
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
using json = nlohmann::ordered_json;
@@ -424,7 +427,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
// model is required (except for server)
// TODO @ngxson : maybe show a list of available models in CLI in this case
- if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
+ if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !params.usage) {
throw std::invalid_argument("error: --model is required\n");
}
@@ -705,6 +708,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.use_jinja = true;
}
+ params.use_color = tty_can_use_colors();
+
// load dynamic backends
ggml_backend_load_all();
@@ -787,10 +792,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(common_arg(
- {"-co", "--color"},
- string_format("colorise output to distinguish prompt and user input from generations (default: %s)", params.use_color ? "true" : "false"),
- [](common_params & params) {
- params.use_color = true;
+ {"-co", "--color"}, "[on|off|auto]",
+ "Colorize output to distinguish prompt and user input from generations ('on', 'off', or 'auto', default: 'auto')\n"
+ "'auto' enables colors when output is to a terminal",
+ [](common_params & params, const std::string & value) {
+ if (is_truthy(value)) {
+ params.use_color = true;
+ } else if (is_falsey(value)) {
+ params.use_color = false;
+ } else if (is_autoy(value)) {
+ params.use_color = tty_can_use_colors();
+ } else {
+ throw std::invalid_argument(
+ string_format("error: unknown value for --color: '%s'\n", value.c_str()));
+ }
}
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
add_opt(common_arg(
@@ -1019,7 +1034,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
} else {
throw std::runtime_error(
- string_format("error: unkown value for --flash-attn: '%s'\n", value.c_str()));
+ string_format("error: unknown value for --flash-attn: '%s'\n", value.c_str()));
}
}).set_env("LLAMA_ARG_FLASH_ATTN"));
add_opt(common_arg(
@@ -1226,7 +1241,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params) {
params.warmup = false;
}
- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
add_opt(common_arg(
{"--spm-infill"},
string_format(
@@ -2488,12 +2503,29 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
"path to save slot kv cache (default: disabled)",
[](common_params & params, const std::string & value) {
params.slot_save_path = value;
+ if (!fs_is_directory(params.slot_save_path)) {
+ throw std::invalid_argument("not a directory: " + value);
+ }
// if doesn't end with DIRECTORY_SEPARATOR, add it
if (!params.slot_save_path.empty() && params.slot_save_path[params.slot_save_path.size() - 1] != DIRECTORY_SEPARATOR) {
params.slot_save_path += DIRECTORY_SEPARATOR;
}
}
).set_examples({LLAMA_EXAMPLE_SERVER}));
+ add_opt(common_arg(
+ {"--media-path"}, "PATH",
+ "directory for loading local media files; files can be accessed via file:// URLs using relative paths (default: disabled)",
+ [](common_params & params, const std::string & value) {
+ params.media_path = value;
+ if (!fs_is_directory(params.media_path)) {
+ throw std::invalid_argument("not a directory: " + value);
+ }
+ // if doesn't end with DIRECTORY_SEPARATOR, add it
+ if (!params.media_path.empty() && params.media_path[params.media_path.size() - 1] != DIRECTORY_SEPARATOR) {
+ params.media_path += DIRECTORY_SEPARATOR;
+ }
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
{"--models-dir"}, "PATH",
"directory containing models for the router server (default: disabled)",
@@ -2676,7 +2708,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
common_log_set_colors(common_log_main(), LOG_COLORS_AUTO);
} else {
throw std::invalid_argument(
- string_format("error: unkown value for --log-colors: '%s'\n", value.c_str()));
+ string_format("error: unknown value for --log-colors: '%s'\n", value.c_str()));
}
}
).set_env("LLAMA_LOG_COLORS"));
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
index b4e3a9c1f3..fa7c3134da 100644
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -1,6 +1,8 @@
#include "chat-parser.h"
+#include "chat-peg-parser.h"
#include "common.h"
#include "log.h"
+#include "peg-parser.h"
#include "regex-partial.h"
#include
@@ -1505,6 +1507,11 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
}
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+ if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
+ syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
+ syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
+ return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
+ }
common_chat_msg_parser builder(input, is_partial, syntax);
try {
common_chat_parse(builder);
@@ -1522,3 +1529,36 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
}
return msg;
}
+
+common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+ if (parser.empty()) {
+ throw std::runtime_error("Failed to parse due to missing parser definition.");
+ }
+
+ LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
+
+ common_peg_parse_context ctx(input, is_partial);
+ auto result = parser.parse(ctx);
+ if (result.fail()) {
+ throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
+ }
+
+ common_chat_msg msg;
+ msg.role = "assistant";
+
+ if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
+ auto mapper = common_chat_peg_native_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+ } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
+ auto mapper = common_chat_peg_constructed_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+ } else {
+ // Generic mapper
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+ }
+ if (!is_partial) {
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
+ }
+ return msg;
+}
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
new file mode 100644
index 0000000000..74a7b6a46d
--- /dev/null
+++ b/common/chat-peg-parser.cpp
@@ -0,0 +1,114 @@
+#include "chat-peg-parser.h"
+
+#include
+
+using json = nlohmann::json;
+
+static std::string_view trim_trailing_space(std::string_view sv) {
+ while (!sv.empty() && std::isspace(static_cast(sv.back()))) {
+ sv.remove_suffix(1);
+ }
+ return sv;
+}
+
+void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
+ arena.visit(result, [this](const common_peg_ast_node & node) {
+ map(node);
+ });
+}
+
+void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
+ bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
+ bool is_content = node.tag == common_chat_peg_builder::CONTENT;
+
+ if (is_reasoning) {
+ result.reasoning_content = std::string(trim_trailing_space(node.text));
+ }
+
+ if (is_content) {
+ result.content = std::string(trim_trailing_space(node.text));
+ }
+}
+
+void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
+ common_chat_peg_mapper::map(node);
+
+ bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
+ bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
+ bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
+ bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
+
+ if (is_tool_open) {
+ result.tool_calls.emplace_back();
+ current_tool = &result.tool_calls.back();
+ }
+
+ if (is_tool_id && current_tool) {
+ current_tool->id = std::string(trim_trailing_space(node.text));
+ }
+
+ if (is_tool_name && current_tool) {
+ current_tool->name = std::string(trim_trailing_space(node.text));
+ }
+
+ if (is_tool_args && current_tool) {
+ current_tool->arguments = std::string(trim_trailing_space(node.text));
+ }
+}
+
+void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
+ common_chat_peg_mapper::map(node);
+
+ bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
+ bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
+ bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
+ bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
+ bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
+ bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
+ bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
+ bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
+
+ if (is_tool_open) {
+ result.tool_calls.emplace_back();
+ current_tool = &result.tool_calls.back();
+ arg_count = 0;
+ }
+
+ if (is_tool_name) {
+ current_tool->name = std::string(node.text);
+ current_tool->arguments = "{";
+ }
+
+ if (is_arg_open) {
+ needs_closing_quote = false;
+ }
+
+ if (is_arg_name && current_tool) {
+ if (arg_count > 0) {
+ current_tool->arguments += ",";
+ }
+ current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
+ ++arg_count;
+ }
+
+ if (is_arg_string && current_tool) {
+ // Serialize to JSON, but exclude the end quote
+ std::string dumped = json(node.text).dump();
+ current_tool->arguments += dumped.substr(0, dumped.size() - 1);
+ needs_closing_quote = true;
+ }
+
+ if (is_arg_close && current_tool) {
+ if (needs_closing_quote) {
+ current_tool->arguments += "\"";
+ }
+ }
+
+ if (is_arg_json && current_tool) {
+ current_tool->arguments += std::string(trim_trailing_space(node.text));
+ }
+
+ if (is_tool_close && current_tool) {
+ current_tool->arguments += "}";
+ }
+}
diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h
new file mode 100644
index 0000000000..b84cbed206
--- /dev/null
+++ b/common/chat-peg-parser.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#include "chat.h"
+#include "peg-parser.h"
+
+class common_chat_peg_builder : public common_peg_parser_builder {
+ public:
+ static constexpr const char * REASONING_BLOCK = "reasoning-block";
+ static constexpr const char * REASONING = "reasoning";
+ static constexpr const char * CONTENT = "content";
+
+ common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
+ common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
+ common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
+};
+
+inline common_peg_arena build_chat_peg_parser(const std::function & fn) {
+ common_chat_peg_builder builder;
+ builder.set_root(fn(builder));
+ return builder.build();
+}
+
+class common_chat_peg_mapper {
+ public:
+ common_chat_msg & result;
+
+ common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
+
+ virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
+ virtual void map(const common_peg_ast_node & node);
+};
+
+class common_chat_peg_native_builder : public common_chat_peg_builder {
+ public:
+ static constexpr const char * TOOL = "tool";
+ static constexpr const char * TOOL_OPEN = "tool-open";
+ static constexpr const char * TOOL_CLOSE = "tool-close";
+ static constexpr const char * TOOL_ID = "tool-id";
+ static constexpr const char * TOOL_NAME = "tool-name";
+ static constexpr const char * TOOL_ARGS = "tool-args";
+
+ common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
+ common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
+ common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+ common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
+ common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+ common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
+};
+
+class common_chat_peg_native_mapper : public common_chat_peg_mapper {
+ common_chat_tool_call * current_tool;
+
+ public:
+ common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+
+ void map(const common_peg_ast_node & node) override;
+};
+
+inline common_peg_arena build_chat_peg_native_parser(const std::function & fn) {
+ common_chat_peg_native_builder builder;
+ builder.set_root(fn(builder));
+ return builder.build();
+}
+
+class common_chat_peg_constructed_builder : public common_chat_peg_builder {
+ public:
+ static constexpr const char * TOOL = "tool";
+ static constexpr const char * TOOL_OPEN = "tool-open";
+ static constexpr const char * TOOL_CLOSE = "tool-close";
+ static constexpr const char * TOOL_NAME = "tool-name";
+ static constexpr const char * TOOL_ARG = "tool-arg";
+ static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
+ static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
+ static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
+ static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
+ static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
+
+ common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
+ common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
+ common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+ common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+ common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
+ common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
+ common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
+ common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
+ common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
+ common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
+};
+
+class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
+ common_chat_tool_call * current_tool;
+ int arg_count = 0;
+ bool needs_closing_quote = false;
+
+ public:
+ common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+
+ void map(const common_peg_ast_node & node) override;
+};
+
+inline common_peg_arena build_chat_peg_constructed_parser(const std::function & fn) {
+ common_chat_peg_constructed_builder builder;
+ builder.set_root(fn(builder));
+ return builder.build();
+}
diff --git a/common/chat.cpp b/common/chat.cpp
index aba64a23a3..d370b57703 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -85,29 +85,36 @@ json common_chat_msg::to_json_oaicompat() const
return message;
}
-std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
+std::vector common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
std::vector diffs;
- if (previous_msg.reasoning_content != new_msg.reasoning_content) {
- auto & diff = diffs.emplace_back();
- diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
- }
- if (previous_msg.content != new_msg.content) {
- auto & diff = diffs.emplace_back();
- diff.content_delta = string_diff(previous_msg.content, new_msg.content);
+ if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
+ diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
+ } else {
+ diffs.reserve(3);
}
- if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
+ // TODO: these can become expensive for long messages - how to optimize?
+ if (msg_prv.reasoning_content != msg_new.reasoning_content) {
+ auto & diff = diffs.emplace_back();
+ diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
+ }
+ if (msg_prv.content != msg_new.content) {
+ auto & diff = diffs.emplace_back();
+ diff.content_delta = string_diff(msg_prv.content, msg_new.content);
+ }
+
+ if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
throw std::runtime_error("Invalid diff: now finding less tool calls!");
}
- if (!previous_msg.tool_calls.empty()) {
- auto idx = previous_msg.tool_calls.size() - 1;
- const auto & pref = previous_msg.tool_calls[idx];
- const auto & newf = new_msg.tool_calls[idx];
+ if (!msg_prv.tool_calls.empty()) {
+ const auto idx = msg_prv.tool_calls.size() - 1;
+ const auto & pref = msg_prv.tool_calls[idx];
+ const auto & newf = msg_new.tool_calls[idx];
if (pref.name != newf.name) {
throw std::runtime_error("Invalid diff: tool call mismatch!");
}
- auto args_diff = string_diff(pref.arguments, newf.arguments);
+ const auto args_diff = string_diff(pref.arguments, newf.arguments);
if (!args_diff.empty() || pref.id != newf.id) {
auto & diff = diffs.emplace_back();
diff.tool_call_index = idx;
@@ -118,11 +125,12 @@ std::vector common_chat_msg_diff::compute_diffs(const comm
diff.tool_call_delta.arguments = args_diff;
}
}
- for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
+ for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
auto & diff = diffs.emplace_back();
diff.tool_call_index = idx;
- diff.tool_call_delta = new_msg.tool_calls[idx];
+ diff.tool_call_delta = msg_new.tool_calls[idx];
}
+
return diffs;
}
@@ -163,7 +171,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
if (tool_choice == "required") {
return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
}
- throw std::runtime_error("Invalid tool_choice: " + tool_choice);
+ throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
}
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
@@ -186,17 +194,17 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
try {
if (!messages.is_array()) {
- throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
+ throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
}
for (const auto & message : messages) {
if (!message.is_object()) {
- throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
+ throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
}
common_chat_msg msg;
if (!message.contains("role")) {
- throw std::runtime_error("Missing 'role' in message: " + message.dump());
+ throw std::invalid_argument("Missing 'role' in message: " + message.dump());
}
msg.role = message.at("role");
@@ -209,11 +217,11 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
} else if (content.is_array()) {
for (const auto & part : content) {
if (!part.contains("type")) {
- throw std::runtime_error("Missing content part type: " + part.dump());
+ throw std::invalid_argument("Missing content part type: " + part.dump());
}
const auto & type = part.at("type");
if (type != "text") {
- throw std::runtime_error("Unsupported content part type: " + type.dump());
+ throw std::invalid_argument("Unsupported content part type: " + type.dump());
}
common_chat_msg_content_part msg_part;
msg_part.type = type;
@@ -221,25 +229,25 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
msg.content_parts.push_back(msg_part);
}
} else if (!content.is_null()) {
- throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+ throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
}
}
if (has_tool_calls) {
for (const auto & tool_call : message.at("tool_calls")) {
common_chat_tool_call tc;
if (!tool_call.contains("type")) {
- throw std::runtime_error("Missing tool call type: " + tool_call.dump());
+ throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
}
const auto & type = tool_call.at("type");
if (type != "function") {
- throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
+ throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
}
if (!tool_call.contains("function")) {
- throw std::runtime_error("Missing tool call function: " + tool_call.dump());
+ throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
}
const auto & fc = tool_call.at("function");
if (!fc.contains("name")) {
- throw std::runtime_error("Missing tool call name: " + tool_call.dump());
+ throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
}
tc.name = fc.at("name");
tc.arguments = fc.at("arguments");
@@ -250,7 +258,7 @@ std::vector common_chat_msgs_parse_oaicompat(const json & messa
}
}
if (!has_content && !has_tool_calls) {
- throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+ throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
}
if (message.contains("reasoning_content")) {
msg.reasoning_content = message.at("reasoning_content");
@@ -353,18 +361,18 @@ std::vector common_chat_tools_parse_oaicompat(const json & too
try {
if (!tools.is_null()) {
if (!tools.is_array()) {
- throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
+ throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
}
for (const auto & tool : tools) {
if (!tool.contains("type")) {
- throw std::runtime_error("Missing tool type: " + tool.dump());
+ throw std::invalid_argument("Missing tool type: " + tool.dump());
}
const auto & type = tool.at("type");
if (!type.is_string() || type != "function") {
- throw std::runtime_error("Unsupported tool type: " + tool.dump());
+ throw std::invalid_argument("Unsupported tool type: " + tool.dump());
}
if (!tool.contains("function")) {
- throw std::runtime_error("Missing tool function: " + tool.dump());
+ throw std::invalid_argument("Missing tool function: " + tool.dump());
}
const auto & function = tool.at("function");
@@ -649,6 +657,9 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
+ case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
+ case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
+ case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_2: return "DeepSeek V3.2";
default:
throw std::runtime_error("Unknown chat format");
diff --git a/common/chat.h b/common/chat.h
index 36f81cdca2..7e8e99e9a9 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -3,6 +3,7 @@
#pragma once
#include "common.h"
+#include "peg-parser.h"
#include
#include
#include
@@ -76,7 +77,7 @@ struct common_chat_msg_diff {
size_t tool_call_index = std::string::npos;
common_chat_tool_call tool_call_delta;
- static std::vector compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
+ static std::vector compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
bool operator==(const common_chat_msg_diff & other) const {
return content_delta == other.content_delta
@@ -125,6 +126,11 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
COMMON_CHAT_FORMAT_DEEPSEEK_V3_2,
+ // These are intended to be parsed by the PEG parser
+ COMMON_CHAT_FORMAT_PEG_SIMPLE,
+ COMMON_CHAT_FORMAT_PEG_NATIVE,
+ COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
+
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
};
@@ -155,6 +161,7 @@ struct common_chat_params {
std::vector grammar_triggers;
std::vector preserved_tokens;
std::vector additional_stops;
+ std::string parser;
};
struct common_chat_syntax {
@@ -164,6 +171,7 @@ struct common_chat_syntax {
bool reasoning_in_content = false;
bool thinking_forced_open = false;
bool parse_tool_calls = true;
+ common_peg_arena parser = {};
};
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -207,6 +215,7 @@ const char* common_chat_format_name(common_chat_format format);
const char* common_reasoning_format_name(common_reasoning_format format);
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
+common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
diff --git a/common/common.cpp b/common/common.cpp
index 10001f5469..0497f90a28 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -694,7 +694,7 @@ bool string_parse_kv_override(const char * data, std::vector= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
|| c == 0xFFFD // Replacement Character (UTF-8)
|| c == 0xFEFF // Byte Order Mark (BOM)
- || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
+ || c == ':' || c == '*' // Illegal characters
|| c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
return false;
}
+ if (!allow_subdirs && (c == '/' || c == '\\')) {
+ // Subdirectories not allowed, reject path separators
+ return false;
+ }
}
// Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
@@ -782,11 +786,29 @@ bool fs_validate_filename(const std::string & filename) {
#include
+#ifdef _WIN32
+static std::wstring utf8_to_wstring(const std::string & str) {
+ if (str.empty()) {
+ return std::wstring();
+ }
+
+ int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
+
+ if (size <= 0) {
+ return std::wstring();
+ }
+
+ std::wstring wstr(size, 0);
+ MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
+
+ return wstr;
+}
+#endif
+
// returns true if successful, false otherwise
bool fs_create_directory_with_parents(const std::string & path) {
#ifdef _WIN32
- std::wstring_convert> converter;
- std::wstring wpath = converter.from_bytes(path);
+ std::wstring wpath = utf8_to_wstring(path);
// if the path already exists, check whether it's a directory
const DWORD attributes = GetFileAttributesW(wpath.c_str());
@@ -859,6 +881,11 @@ bool fs_create_directory_with_parents(const std::string & path) {
#endif // _WIN32
}
+bool fs_is_directory(const std::string & path) {
+ std::filesystem::path dir(path);
+ return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
+}
+
std::string fs_get_cache_directory() {
std::string cache_directory = "";
auto ensure_trailing_slash = [](std::string p) {
@@ -893,6 +920,8 @@ std::string fs_get_cache_directory() {
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
#elif defined(_WIN32)
cache_directory = std::getenv("LOCALAPPDATA");
+#elif defined(__EMSCRIPTEN__)
+ GGML_ABORT("not implemented on this platform");
#else
# error Unknown architecture
#endif
@@ -953,6 +982,32 @@ std::vector fs_list(const std::string & path, bool include_dir
return files;
}
+//
+// TTY utils
+//
+
+bool tty_can_use_colors() {
+ // Check NO_COLOR environment variable (https://no-color.org/)
+ if (const char * no_color = std::getenv("NO_COLOR")) {
+ if (no_color[0] != '\0') {
+ return false;
+ }
+ }
+
+ // Check TERM environment variable
+ if (const char * term = std::getenv("TERM")) {
+ if (std::strcmp(term, "dumb") == 0) {
+ return false;
+ }
+ }
+
+ // Check if stdout and stderr are connected to a terminal
+ // We check both because log messages can go to either
+ bool stdout_is_tty = isatty(fileno(stdout));
+ bool stderr_is_tty = isatty(fileno(stderr));
+
+ return stdout_is_tty || stderr_is_tty;
+}
//
// Model utils
diff --git a/common/common.h b/common/common.h
index cdca5e26a2..d28e48991c 100644
--- a/common/common.h
+++ b/common/common.h
@@ -12,6 +12,10 @@
#include
#include